In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np
import re

2025-08-13 11:26:05.200801: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
## Read in data ##
df = pd.read_csv("/Users/jameshill/PycharmProjects/bioacoustic-classifier/src/data/annotations/spectrogram_labels.csv")
df['filepath'] = "/Users/jameshill/PycharmProjects/bioacoustic-classifier/data/processed/spectrogram_3s/" + df['filename'] + ".png"

In [11]:
## Set up data for multi-label classification ##
# Get all unique labels
all_labels = set()
for loc in df['label']:
    species = loc.split('_and_')
    all_labels.update(species)

df['split_labels'] = df['label'].str.split('_and_')

In [12]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# Define preprocessing function
# Preprocessing needed as saving the images as PNGs and then reloading them with decode_png
# This can result in unforeseen issues, so this function adjusts these as a failsafe
def decode_image(filename, label):
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [64, 512])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

# Initialise and fit multi-label encoder
mle = MultiLabelBinarizer() 
multi_labels = mle.fit_transform(df['split_labels'])

# Get arrays
filepaths = df['filepath'].values
labels = multi_labels

In [64]:
np.shape(multi_labels)
# Returns (1200, 28) - 1200 rows (samples) or 28 different categories (columns)

(1200, 28)

In [13]:
## Test/ train split ##
# Extract indices of data, test/ train split on this
# This is so images themselves do not need duplicating but instead augmentation will be applied when relevant index occurs
idx = np.arange(len(filepaths))
train_idx, test_val_idx = train_test_split(idx, test_size=0.3, random_state = 1929, shuffle=True)
test_idx, val_idx = train_test_split(test_val_idx, test_size=0.8, random_state = 1929, shuffle=True)

train_labels = labels[train_idx]

(1200, 28)

In [44]:
class_freqs = train_labels.sum(axis=0)
class_freqs

array([287,  26,   0,   7, 425,   1,  10,  23,   9,   6,   1,   2,   4,
         1,   6,   1,   4,   1,   7,   0,   1,   2,   3,   1,  16,   6,
       106, 143])

In [36]:
most_freq = class_freqs.max()
eps = 1e-6

array([2.00e+00, 1.70e+01, 4.25e+08, 6.10e+01, 1.00e+00, 4.25e+02,
       4.30e+01, 1.90e+01, 4.80e+01, 7.10e+01, 4.25e+02, 2.13e+02,
       1.07e+02, 4.25e+02, 7.10e+01, 4.25e+02, 1.07e+02, 4.25e+02,
       6.10e+01, 4.25e+08, 4.25e+02, 2.13e+02, 1.42e+02, 4.25e+02,
       2.70e+01, 7.10e+01, 5.00e+00, 3.00e+00])

In [None]:
repeat_i = np.ones(len(train_idx), dtype=int)
repeat_i

In [58]:
len(class_freqs)

28

In [15]:
def upsample_rare(labels, index):
    class_freqs = train_labels.sum(axis=0)
    most_freq = class_freqs.max()
    upsample_factor = np.clip(np.floor((most_freq - class_freqs) / (class_freqs + eps)), 1, 10)
    

array([[0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [6]:
# Define data augmentation functions
def vertical_roll(image):
    shift = tf.random.uniform(shape=[], minval=-5, maxval=6, dtype=tf.int32)
    return tf.roll(image, shift=shift, axis=0)

def horizontal_roll(image):
    shift = tf.random.uniform(shape=[], minval=-50, maxval=51, dtype=tf.int32)
    return tf.roll(image, shift=shift, axis=1)

def warp(image):
    angle = tf.random.uniform([], -0.05, 0.05)  
    warped = tfa.image.rotate(image, angles=angle, fill_mode="constant", fill_value=0.0)
    return warped

def add_noise(image):
    noise = tf.random.normal(tf.shape(image), mean=0.0, stddev=0.02)
    noised = tf.clip_by_value(image + noise, 0.0, 0.1)
    return noised

# This is to augment the image by 3 of the 4 possible 
def augment_k_of_n(image, label, k=3):
    ops = [vertical_roll, horizontal_roll, warp, add_noise]
    idx = tf.range(len(ops))
    idx = tf.random.shuffle(idx)[:k]

    def apply_op(im, op):
        return tf.switch_case(op, branch_fns=[
            lambda: vertical_roll(im),
            lambda: horizontal_roll(im),
            lambda: warp(im),
            lambda: add_noise(im)
        ])

    for op_idx in tf.unstack(idx):
        image = apply_op(image, op_idx)
    return image, label


<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 0, 1], dtype=int32)>

In [4]:
from src.models.active_learning_kcluster import kCenterGreedy

ModuleNotFoundError: No module named 'src'