In [None]:
import os

import src.data.Dataset as dt
import pandas as pd
import tensorflow
from itables import init_notebook_mode
from keras.callbacks import TensorBoard
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.metrics import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator

init_notebook_mode(all_interactive=True)

required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

In [None]:
DATASET_CSV = os.path.abspath(
    "./data/processed/emptyFitdetectionFit/28570Images_0_003_threshold.csv"
)
DATASET_PATH = os.path.dirname(DATASET_CSV)

dataset = dt.load_from_csv(DATASET_CSV)

dataset["file_name"] = dataset["file_name"].apply(
    lambda x: os.path.join(DATASET_PATH, x)
)
dataset["binary_label"] = dataset["binary_label"].astype(str)

train_dataframe = dataset[dataset["subset"] == "train"]
validation_dataframe = dataset[dataset["subset"] == "validation"]
test_dataframe = dataset[dataset["subset"] == "test"]

In [3]:
IMAGE_SIZE = (224, 224)
IMAGE_SHAPE = IMAGE_SIZE + (3,)

MODEL_NAME = "VGG16"
VERSION = "v1.0.0"

EPOCHS = 60
BATCH_SIZE = 16
SEED = 42

In [None]:
train_ImageDataGenerator = ImageDataGenerator(
    rescale=1.0 / 255,
)
train_images = train_ImageDataGenerator.flow_from_dataframe(
    dataframe=train_dataframe,
    x_col="file_name",
    y_col="binary_label",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=True,
    seed=SEED,
)

other_ImageDataGenerator = ImageDataGenerator(
    rescale=1.0 / 255,
)
validation_images = other_ImageDataGenerator.flow_from_dataframe(
    dataframe=validation_dataframe,
    x_col="file_name",
    y_col="binary_label",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=True,
    seed=SEED,
)
test_images = other_ImageDataGenerator.flow_from_dataframe(
    dataframe=test_dataframe,
    x_col="file_name",
    y_col="binary_label",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    seed=SEED,
)


In [8]:
input_tensor = Input(shape=IMAGE_SHAPE)

# 1st block
x = Conv2D(
    filters=64,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_64_filters_1",
)(input_tensor)
x = Conv2D(
    filters=64,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_64_filters_2",
)(x)
x = MaxPooling2D(pool_size=2, strides=2, padding="same", name="Max_Pooling_2D_1")(x)

# 2nd block
x = Conv2D(
    filters=128,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_128_filters_1",
)(input_tensor)
x = Conv2D(
    filters=128,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_128_filters_2",
)(x)
x = MaxPooling2D(pool_size=2, strides=2, padding="same", name="Max_Pooling_2D_2")(x)

# 3rd block
x = Conv2D(
    filters=256,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_256_filters_1",
)(input_tensor)
x = Conv2D(
    filters=256,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_256_filters_2",
)(x)
x = MaxPooling2D(pool_size=2, strides=2, padding="same", name="Max_Pooling_2D_3")(x)

# 4th block
x = Conv2D(
    filters=512,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_512_filters_1",
)(input_tensor)
x = Conv2D(
    filters=512,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_512_filters_2",
)(x)
x = MaxPooling2D(pool_size=2, strides=2, padding="same", name="Max_Pooling_2D_4")(x)

# 5th block
x = Conv2D(
    filters=512,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_512_filters_3",
)(input_tensor)
x = Conv2D(
    filters=512,
    kernel_size=3,
    activation="relu",
    padding="same",
    name="Conv_2D_512_filters_4",
)(x)
x = MaxPooling2D(pool_size=2, strides=2, padding="same", name="Max_Pooling_2D_5")(x)

# Fully connected layers
x = Flatten()(x)
x = Dense(4096, activation="relu", name="Fully_Connected_4096_1")(x)
x = Dense(4096, activation="relu", name="Fully_Connected_4096_2")(x)
output_tensor = Dense(1, activation="sigmoid", name="Fully_Connected_1_3")(x)

model = Model(input_tensor, output_tensor)

In [6]:
model.compile(
    # optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.00001),
    optimizer=tensorflow.keras.optimizers.legacy.Adam(learning_rate=0.001),
    loss=tensorflow.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=[BinaryAccuracy(), Precision(), Recall(), AUC()],
)

In [None]:
with tensorflow.device("/GPU:0"):
    history = model.fit(
        train_images,
        steps_per_epoch=train_images.samples // BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=validation_images,
        validation_steps=validation_images.samples // BATCH_SIZE,
        verbose=1,
        callbacks=[
            TensorBoard(log_dir=f"./logs/{MODEL_NAME}/{VERSION}"),
            EarlyStopping(
                monitor="val_loss", patience=3, verbose=1, restore_best_weights=True
            ),
        ],
    )

In [None]:
model.save(f"./models/{MODEL_NAME}/{MODEL_NAME}_{VERSION}.h5")

In [None]:
history_df = pd.DataFrame(history.history)
history_df.to_csv(f"./logs/{MODEL_NAME}/{VERSION}/history.csv", index=False)

In [None]:
results = history.model.evaluate(test_images)
metric_names = history.model.metrics_names

evaluation_results = {
    ("test_" + name): value for name, value in zip(metric_names, results)
}
evaluation_df = pd.DataFrame([evaluation_results])
evaluation_df.to_csv(f"./logs/{MODEL_NAME}/{VERSION}/results.csv", index=False)