In [3]:
import os
import numpy as np
import tensorflow as tf
from pathlib import Path

data_path = '../dataset_creation'
md_path = 'metadata'
ds_path = 'dataset'
spec_path = 'spectrograms/'
cp_path = 'checkpoints'
saved_model_path = 'saved_model'

# seed per la riproducibilità
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

if not os.path.exists(cp_path):
    os.makedirs(cp_path)
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
# Declare constants
IMAGE_HEIGHT = 328
IMAGE_WIDTH = 794
BATCH_SIZE = 16
N_CHANNELS = 3
N_CLASSES = 12

# Make a dataset containing the training spectrograms
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
                                             batch_size=BATCH_SIZE,
                                             validation_split=0.2,
                                             directory=spec_path,
                                             shuffle=True,
                                             color_mode='rgb',
                                             image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
                                             subset="training",
                                             seed=0)
# Make a dataset containing the validation spectrogram
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
                                             batch_size=BATCH_SIZE,
                                             validation_split=0.2,
                                             directory=spec_path,
                                             shuffle=True,
                                             color_mode='rgb',
                                             image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
                                             subset="validation",
                                             seed=0)


Found 76312 files belonging to 12 classes.
Using 61050 files for training.
Found 76312 files belonging to 12 classes.
Using 15262 files for validation.


In [15]:
# Ottieni la lista delle classi
class_names = train_dataset.class_names

# Conta i file per ogni classe nella directory degli spettrogrammi
for class_name in class_names:
    class_dir = os.path.join(spec_path, class_name)
    num_files = len([f for f in os.listdir(class_dir) if f.endswith('.png')])
    print(f"Classe {class_name}: {num_files} elementi")

Classe Bowhead Whale: 2553 elementi
Classe Common Dolphin: 1175 elementi
Classe Fin, Finback Whale: 13272 elementi
Classe Humpback Whale: 6279 elementi
Classe Killer Whale: 2386 elementi
Classe Long-Finned Pilot Whale: 1684 elementi
Classe Pantropical Spotted Dolphin: 1679 elementi
Classe Sperm Whale: 42634 elementi
Classe Spinner Dolphin: 1757 elementi
Classe Walrus: 683 elementi
Classe Weddell Seal: 1352 elementi
Classe White-sided Dolphin: 857 elementi


In [4]:
# Function to prepare our datasets for modelling
def prepare(ds, augment=False):
    # Define our one transformation
    rescale = tf.keras.Sequential([tf.keras.layers.Rescaling(1./255)])
    flip_and_rotate = tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.RandomRotation(0.2)
    ])
    
    # Apply rescale to both datasets and augmentation only to training
    ds = ds.map(lambda x, y: (rescale(x, training=True), y))
    if augment: ds = ds.map(lambda x, y: (flip_and_rotate(x, training=True), y))
    return ds

train_dataset = prepare(train_dataset, augment=False)
valid_dataset = prepare(valid_dataset, augment=False)

batches = len(train_dataset)
checkpoint_freq = batches*3
print(f"checkpoint frequency: {checkpoint_freq} batches")

checkpoint frequency: 11448 batches


In [5]:
# Create CNN model
def create_model():
    initializer = tf.keras.initializers.GlorotNormal(seed=42)

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, N_CHANNELS)))
    model.add(tf.keras.layers.Conv2D(32, 3, strides=2, padding='same', activation='relu', kernel_initializer=initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', kernel_initializer=initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu', kernel_initializer=initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation='relu', kernel_initializer=initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(N_CLASSES, activation='softmax', kernel_initializer=initializer))
    return model

In [7]:
model = create_model()
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.RMSprop(),
    metrics=['accuracy'],
)
es_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
cp_cb = tf.keras.callbacks.ModelCheckpoint(filepath=Path(cp_path, 'recovery_weights.weights.h5'), save_weights_only=True, save_freq=checkpoint_freq)
log_cb = tf.keras.callbacks.CSVLogger("history.csv", append=True)

history = model.fit(train_dataset, 
                    epochs=50, 
                    validation_data=valid_dataset, 
                    callbacks=[es_cb, cp_cb, log_cb]
                    )

Epoch 1/50


I0000 00:00:1749550690.816114    1438 service.cc:152] XLA service 0x72dbe80044c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1749550690.816150    1438 service.cc:160]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2025-06-10 10:18:11.123062: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1749550691.861342    1438 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-06-10 10:18:13.136902: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.11 = (f32[16,64,82,198]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,32,82,198]{3,2,1,0} %bitcast.9062, f32[64,32,3,3]{3,2,1,0} %bitcast.7488, f32[64]{0} %bitcast.9102), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convB

[1m   2/3816[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:15[0m 67ms/step - accuracy: 0.1094 - loss: 4.2488   

I0000 00:00:1749550699.418562    1438 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m3815/3816[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 66ms/step - accuracy: 0.8861 - loss: 0.4299

2025-06-10 10:22:32.993677: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.11 = (f32[9,64,82,198]{3,2,1,0}, u8[0]{0}) custom-call(f32[9,32,82,198]{3,2,1,0} %bitcast.9062, f32[64,32,3,3]{3,2,1,0} %bitcast.7488, f32[64]{0} %bitcast.9102), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", metadata={op_type="Conv2D" op_name="sequential_5_1/conv2d_4_1/convolution" source_file="/home/elisaverza_gm/mmd/.mmd/lib/python3.12/site-packages/tensorflow/python/framework/ops.py" source_line=1200}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false}
2025-06-10 10:22:33.090476: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] O

[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.8861 - loss: 0.4299

2025-06-10 10:22:38.879608: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.11 = (f32[16,64,82,198]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,32,82,198]{3,2,1,0} %bitcast.897, f32[64,32,3,3]{3,2,1,0} %bitcast.904, f32[64]{0} %bitcast.906), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", metadata={op_type="Conv2D" op_name="sequential_5_1/conv2d_4_1/convolution" source_file="/home/elisaverza_gm/mmd/.mmd/lib/python3.12/site-packages/tensorflow/python/framework/ops.py" source_line=1200}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kRelu","side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false}
2025-06-10 10:22:39.225791: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Om

[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m333s[0m 84ms/step - accuracy: 0.8862 - loss: 0.4298 - val_accuracy: 0.5859 - val_loss: 7.9534
Epoch 2/50
[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m317s[0m 83ms/step - accuracy: 0.9778 - loss: 0.0773 - val_accuracy: 0.9771 - val_loss: 0.2612
Epoch 3/50
[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 82ms/step - accuracy: 0.9836 - loss: 0.0586 - val_accuracy: 0.9781 - val_loss: 0.0841
Epoch 4/50
[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 84ms/step - accuracy: 0.9880 - loss: 0.0441 - val_accuracy: 0.9663 - val_loss: 0.3528
Epoch 5/50
[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m314s[0m 82ms/step - accuracy: 0.9895 - loss: 0.0368 - val_accuracy: 0.9886 - val_loss: 0.0734
Epoch 6/50
[1m3816/3816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 83ms/step - accuracy: 0.9919 - loss: 0.0292 - val_accuracy: 0.9790 - val_loss: 0.1521
Epoch 7/5

In [8]:
final_loss, final_acc = model.evaluate(valid_dataset, verbose=0)
print("Final loss: {0:.6f}, final accuracy: {1:.6f}".format(final_loss, final_acc))

Final loss: 0.211576, final accuracy: 0.965339


In [12]:
# Save the weights
model.save_weights(Path(saved_model_path, '001_cnn_weights.weights.h5'))
model.save(Path(saved_model_path, '001_cnn_model.keras'))