In [36]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np
import re

In [104]:
## Read in data ##
df = pd.read_csv("/Users/jameshill/PycharmProjects/bioacoustic-classifier/src/data/annotations/spectrogram_labels.csv")
df['filepath'] = "/Users/jameshill/PycharmProjects/bioacoustic-classifier/data/processed/spectrogram_3s/" + df['filename'] + ".png"

In [105]:
## Set up data for multi-label classification ##
# Get all unique labels
all_labels = set()
for loc in df['label']:
    species = loc.split('_and_')
    all_labels.update(species)

df['split_labels'] = df['label'].str.split('_and_')

In [106]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# Get arrays
filepaths = df['filepath'].values

## Test/ train split ##
# Extract indices of data, test/ train split on this
# This is so images themselves do not need duplicating but instead augmentation will be applied when relevant index occurs
idx = np.arange(len(filepaths))
train_idx, test_val_idx = train_test_split(idx, test_size=0.3, random_state = 1929, shuffle=True)
test_idx, val_idx = train_test_split(test_val_idx, test_size=0.8, random_state = 1929, shuffle=True)

# Initialise and fit multi-label encoder
mle = MultiLabelBinarizer() 
multi_labels = mle.fit_transform(df['split_labels'])

labels = multi_labels
np.shape(multi_labels)
# Returns (1200, 28) - 1200 rows (samples) or 28 different categories (columns)

(1200, 28)

In [107]:
def upsample_rare(train_labels, train_idx):
    train_idx = np.asarray(train_idx, dtype=int)          

    Yt = train_labels[train_idx]
    class_freqs = Yt.sum(axis=0)
    present = class_freqs > 0
    most_freq = class_freqs[present].max() if present.any() else 1

    upsample_factor = np.ones_like(class_freqs, dtype=float)
    upsample_factor[present] = np.clip(np.ceil(most_freq / class_freqs[present]).astype(int),1,10)
    upsample_factor = upsample_factor.astype(int)        

    classes_upsampled = Yt * upsample_factor
    label_upsample_factor = classes_upsampled.max(axis=1).astype(int)  
    extra_counts = np.maximum(label_upsample_factor - 1, 0).astype(int)

    base_idx = train_idx
    dup_idx = np.repeat(train_idx, extra_counts)
    all_idx = np.concatenate([base_idx, dup_idx])
    all_flag = (np.concatenate([np.zeros(len(base_idx), bool),
                                np.ones(len(dup_idx), bool)])
                if dup_idx.size else np.zeros(len(base_idx), bool))

    rng = np.random.default_rng(1929)
    perm = rng.permutation(all_idx.shape[0])
    all_idx  = all_idx[perm]
    all_flag = all_flag[perm]
    return all_idx, all_flag

In [146]:
# Define data augmentation functions
def vertical_roll(image):
    shift = tf.random.uniform(shape=[], minval=-5, maxval=6, dtype=tf.int32)
    return tf.roll(image, shift=shift, axis=0)

def horizontal_roll(image):
    shift = tf.random.uniform(shape=[], minval=-50, maxval=51, dtype=tf.int32)
    return tf.roll(image, shift=shift, axis=1)

def warp(image):
    angle = tf.random.uniform([], -0.05, 0.05)  # radians
    rotated = tf.image.rotate(image, angles=angle, fill_mode="constant")
    return rotated

def add_noise(image):
    noise = tf.random.normal(tf.shape(image), mean=0.0, stddev=0.02)
    noised = tf.clip_by_value(image + noise, 0.0, 1.0)
    return noised

# This is to augment the image by 3 of the 4 possible 
def augment_k_of_n(image, label, k=3):
    ops = [vertical_roll, horizontal_roll, add_noise]
    idx = tf.range(len(ops))
    idx = tf.random.shuffle(idx)[:k]

    def apply_op(im, op):
        return tf.switch_case(op, branch_fns=[
            lambda: vertical_roll(im),
            lambda: horizontal_roll(im),
            #lambda: warp(im),
            lambda: add_noise(im)
        ])

    for op_idx in tf.unstack(idx):
        image = apply_op(image, op_idx)
    return image, label

In [110]:
# Define preprocessing function
# Preprocessing needed as saving the images as PNGs and then reloading them with decode_png
# This can result in unforeseen issues, so this function adjusts these as a failsafe
def decode_image(filename, label):
    # read in and process image
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.resize(image, [64, 512])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

def decode_image_with_aug(filename, label, do_aug):
    # read in and process image
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.resize(image, [64, 512])
    image = tf.cast(image, tf.float32) / 255.0

    # process if augment
    def aug():
        img, lbl = augment_k_of_n(image, label)
        return img, lbl
        
    # process if no augment
    def no_aug():
        return image, label

    # image and label are either aug output or no_aug output, depending on do_aug
    image, label = tf.cond(do_aug, aug, no_aug)
    return image, label

def make_train_ds(filepaths, labels, indices, flags, batch_size = 32, seed = 1929):
    
    idx  = np.asarray(indices, dtype=int)
    labs = np.asarray(labels)[idx].astype('float32')  
    paths = np.asarray(filepaths)[idx]
    flgs  = np.asarray(flags).astype('bool')

    ds = tf.data.Dataset.from_tensor_slices((paths, labs, flgs))
    ds = ds.shuffle(len(idx), seed=seed, reshuffle_each_iteration=True)
    ds = ds.map(decode_image_with_aug, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def make_eval_ds(filepaths, labels, indices, batch_size):
    ds = tf.data.Dataset.from_tensor_slices((filepaths[indices],
                                             labels[indices].astype('float32')))
    ds = ds.map(decode_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

In [109]:
# Upsample first epoch #
epoch_idx, epoch_flag = upsample_rare(train_labels = labels, train_idx = train_idx)

In [111]:
# Call function on all data and labels but using indices and flags produced by upsampled training data
train_ds = make_train_ds(filepaths, labels, epoch_idx, epoch_flag, batch_size=32)
val_ds = make_eval_ds(filepaths, labels, val_idx, batch_size=32)
test_ds = make_eval_ds(filepaths, labels, test_idx, batch_size=32)

In [None]:
import matplotlib.pyplot as plt

# Take one batch from the dataset
for images, labels in train_ds.take(10):
    img = images[0].numpy().squeeze(-1)  # (64, 512)
    plt.imshow(img, cmap='gray', vmin=0, vmax=1)
    plt.axis('off')
    plt.show()

In [None]:
# Specify model architecture
x = keras.Input(shape=(64, 512, 1))
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu", padding="same")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding="same")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding="same")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu", padding="same")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu", padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation="relu")(x)
outputs = layers.Dense(len(mle.classes_), activation="sigmoid")(x)
model = keras.Model(inputs=x, outputs=outputs)

In [149]:
epochs = 10
for i in range(1,epochs+1):
    print(i)

1
2
3
4
5
6
7
8
9
10
