# Main CNN model for bat call classification

In [5]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow_addons.metrics import F1Score
import cv2
import time
from sklearn.model_selection import train_test_split
import itertools_len as itertools
from itertools_len import product
import gc
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from sklearn.model_selection import KFold

In [13]:
# load image data s and reshape 
data = pd.read_pickle('./data/images_df_numerical.pkl')

# convert to numpy array
X, y = data['data'], data['Species']
classes = y.unique()
print(classes)
image_size = X[0].size
samples = X.size
image_shape = (216,334,3) # height, width , channel
# reshape every row to the image, swap rgbs and scale to 0-1
X = np.array([
    cv2.cvtColor(row.reshape(image_shape), cv2.COLOR_BGR2RGB).astype('float32')/255. 
    for row in X])
y = np.array([row.astype('int32') for row in y])

Species
2    665
4    290
0    287
1    116
5    110
6     60
3      3
Name: count, dtype: int64
Species
2    665
4    290
0    287
1    116
5    110
6     60
Name: count, dtype: int64
                                                   data Species
0     [8, 2, 10, 22, 4, 21, 22, 4, 21, 22, 5, 24, 22...       0
1     [0, 0, 0, 0, 0, 0, 16, 2, 8, 6, 1, 1, 6, 1, 2,...       2
2     [7, 3, 12, 21, 6, 26, 20, 7, 30, 15, 2, 7, 18,...       4
3     [6, 0, 3, 22, 4, 20, 20, 7, 30, 18, 10, 36, 14...       2
4     [7, 0, 4, 21, 2, 16, 21, 6, 26, 21, 2, 16, 19,...       4
...                                                 ...     ...
1526  [8, 0, 5, 22, 4, 21, 22, 3, 18, 20, 8, 32, 22,...       5
1527  [6, 0, 3, 20, 2, 12, 14, 13, 41, 18, 10, 36, 1...       2
1528  [7, 0, 4, 2, 0, 0, 21, 6, 27, 21, 6, 26, 22, 3...       2
1529  [0, 0, 0, 21, 2, 15, 7, 1, 2, 18, 10, 36, 21, ...       0
1530  [8, 1, 8, 21, 2, 13, 19, 8, 32, 19, 1, 10, 13,...       2

[1528 rows x 2 columns]
[0 2 4 5 6 1]


In [14]:
kfold = KFold(n_splits=10, shuffle=True)

tf.keras.utils.set_random_seed(1)

# If using TensorFlow, this will make GPU ops as deterministic as possible,
# but it will affect the overall performance, so be mindful of that.
tf.config.experimental.enable_op_determinism()

In [15]:
number_of_classes = classes.size
early_stopping = EarlyStopping(monitor='val_accuracy', patience=30, min_delta=0.001, start_from_epoch=15, restore_best_weights=True)
epochs = 200
dropout_rate = 0.4

def kaggle_model(optimizer):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=image_shape))
    model.add(tf.keras.layers.Conv2D(32, 3, strides=2, padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Dense(number_of_classes, activation='softmax'))
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    return model

def create_optimizers(X_train) -> dict:
    s = 130 * len(X_train) // 32 # number of steps in 130 epochs (batch size = 32)
    exp_decay_sgd_adagrad = ExponentialDecay(0.01, s, 0.1)
    exp_adam = ExponentialDecay(0.1, s, 0.95, staircase=True)

    momentum = 0.99
    sgd_exp = SGD(exp_decay_sgd_adagrad, momentum=momentum)
    adam_exp = Adam(exp_adam)

    sgd = SGD(0.001, momentum=momentum)
    adam = Adam(0.001)

    return {"sgd_exp": sgd_exp, "adam_exp": adam_exp, "sgd": sgd, "adam": adam}

histories_with_params = list()

# Training and validating the model using KFold
for train_indezes, test_indezes in kfold.split(X, y):
    X_train, y_train = X[train_indezes], y[test_indezes]
    X_test, y_test = X[train_indezes], y[test_indezes]

    optimizers = create_optimizers(X_train)

    for optimizer_name, optimizer in optimizers.items():
        model = kaggle_model(optimizer)
        history = model.fit(
            X_train,
            y_train,
            epochs=epochs,
            batch_size=32,
            workers=1, # workers are number of cores
            callbacks=early_stopping,
            validation_split=0.2,
            verbose=1)
        model.save("cnn_files/model.h5", overwrite=True)
        history_with_param = {"optimizer": optimizer_name, "history": history}
        histories_with_params.append(history_with_param)

number_of_epochs = len(history.history["accuracy"])
for history_with_param in histories_with_params:
    model = load_model(f"cnn_files/model_{history_with_param['optimizer']}.keras")
    test_score = round(model.evaluate(X_test, y_test)[1], 2)*100
    del model
    gc.collect()

    plt.figure()
    plt.plot(history_with_param["history"].history["accuracy"], label="train_data accuracy")
    plt.plot(history_with_param["history"].history["val_accuracy"], label="val_data accuracy")
    plt.scatter(number_of_epochs, test_score/100, label="test_data accuracy", marker="x", c="g")
    plt.title(f"opt: {history_with_param['optimizer']} Test Score: {test_score}%")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend(loc="upper left")
    plt.savefig(f"./cnn_files/{history_with_param['optimizer']}.png",dpi=600)
    #plt.show()




2023-12-21 12:22:48.400702: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 6 which is outside the valid range of [0, 6).  Label values: 1 2 2 4 0 2 2 6 2 2 0 2 2 1 0 1 4 5 4 5 6 2 0 2 0 2 2 2 2 0 2 2


InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1077, in launch_instance

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 529, in dispatch_queue

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 518, in process_one

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 424, in dispatch_shell

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 766, in execute_request

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code

  File "/var/folders/g5/s1_7yy_x793463sh2_n7p8hh0000gn/T/ipykernel_13275/1902283321.py", line 59, in <module>

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1151, in train_step

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1209, in compute_loss

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/losses.py", line 143, in __call__

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/losses.py", line 270, in call

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/losses.py", line 2454, in sparse_categorical_crossentropy

  File "/Users/seba/Documents/main_data_cloud/oth/s3/ml/.venv/lib/python3.11/site-packages/keras/src/backend.py", line 5775, in sparse_categorical_crossentropy

Received a label value of 6 which is outside the valid range of [0, 6).  Label values: 1 2 2 4 0 2 2 6 2 2 0 2 2 1 0 1 4 5 4 5 6 2 0 2 0 2 2 2 2 0 2 2
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_6037]