In [None]:
import os

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.utils.class_weight import compute_class_weight

import src.graphics as graphics
import src.model as model

In [None]:
MODEL_NAME = "MegaClassifier_a"
VERSION = "v2"
SUBVERSION = 0

DATASET_CSV = os.path.abspath("./data/processed/onlyDetectionsForTrain/onlyDetectionsForTrain.csv")
DATASET_PATH = os.path.dirname(DATASET_CSV)

In [None]:
dataset = pd.read_csv(DATASET_CSV, sep=";")
dataset['file_name'] = dataset['file_name'].apply(lambda x: os.path.join(DATASET_PATH, x))
dataset['binary_label'] = dataset['binary_label'].astype(str)

train_dataset = dataset[dataset['subset'] == "train"]
validation_dataset = dataset[dataset['subset'] == "validation"]
test_dataset = dataset[dataset['subset'] == "test"]

In [None]:
EPOCHS = 10
BATCH_SIZE = 32
LOSS_FUNCTIONS = {
    "BinaryCrossentropy": tf.keras.losses.BinaryCrossentropy(),
    "BinaryFocalCrossentropy": tf.keras.losses.BinaryFocalCrossentropy(alpha=0.25, gamma=2.0),
    "WeightedBinaryCrossentropy": tf.keras.losses.BinaryCrossentropy(),
    "SigmoidFocalCrossEntropy": tfa.losses.SigmoidFocalCrossEntropy(alpha=0.25, gamma=2.0),
    "FocalLoss": model.FocalLoss(alpha=0.25, gamma=2.0),
}

class_weights = compute_class_weight(class_weight="balanced",
                                     classes=np.unique(train_dataset['binary_label']),
                                     y=train_dataset['binary_label'])
WEIGHTS = {i: class_weights[i] for i in range(len(class_weights))}

In [None]:
all_results = []
for LOSS_NAME, LOSS_FUNCTION in LOSS_FUNCTIONS.items():
    train_generator, other_generator = model.image_data_generator(version=VERSION)

    train_images, validation_images, test_images = model.flow_from_dataframe(
        datasets=[train_dataset, validation_dataset, test_dataset],
        generators=[train_generator, other_generator],
        batch_size=BATCH_SIZE,
    )

    mobilenet_v2 = model.load_pretrained(version=VERSION)

    mega_classifier_a = model.compile_model(
        version=VERSION,
        pretrained_model=mobilenet_v2,
        loss_function=LOSS_FUNCTION,
        weights=WEIGHTS if LOSS_NAME == "WeightedBinaryCrossentropy" else None,
        name=f"{MODEL_NAME}_{VERSION}_{SUBVERSION}")

    callbacks = model.callbacks(
        version=VERSION,
        logs_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
    )

    history = model.fit(
        model=mega_classifier_a,
        images=[train_images, validation_images],
        epochs=EPOCHS,
        call_backs=callbacks,
        save_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
    )

    model.save_training(
        data=pd.DataFrame(history.history),
        save_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/history_{VERSION}_{SUBVERSION}.csv",
    )

    os.makedirs(f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}", exist_ok=True)

    accuracy_chart = graphics.create_training_accuracy_chart(
        history_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/history_{VERSION}_{SUBVERSION}.csv",
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    accuracy_chart.write_image(
        f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/train_accuracy_{VERSION}_{SUBVERSION}.png")

    loss_chart = graphics.create_training_loss_chart(
        history_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/history_{VERSION}_{SUBVERSION}.csv",
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    loss_chart.write_image(
        f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/train_loss_{VERSION}_{SUBVERSION}.png")

    results = model.evaluate_model(
        model_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
        test_images=test_images,
        custom_loss=True if SUBVERSION == 4 else False,
    )

    metric_names = history.model.metrics_names
    evaluation_results = {("test_" + name): value for name, value in zip(metric_names, results)}

    model.save_evaluation(
        data=pd.DataFrame([evaluation_results]),
        save_path=f"./logs/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/evaluation_{VERSION}_{SUBVERSION}.csv",
    )

    print("\n\n")
    print(f"Accuracy: {results[1]:.4%}")
    print(f"Loss: {results[0]:.4%}")
    print(f"AUC: {results[4]:.4%}")
    print(f"Precision: {results[2]:.4%}")
    print(f"Recall: {results[3]:.4%}")
    print("\n\n")

    fpr, tpr, thresholds, roc_auc = model.roc_curve_model(
        model_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
        test_images=test_images,
        custom_loss=True if SUBVERSION == 4 else False,
    )

    roc_curve_chart = graphics.create_roc_curve_chart(
        fpr=fpr,
        tpr=tpr,
        roc_auc=roc_auc,
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    roc_curve_chart.write_image(
        f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/roc_curve_{VERSION}_{SUBVERSION}.png")

    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    print(f"OPTIMAL THRESHOLD: {optimal_threshold}")

    confusion_matrix = model.confusion_matrix_model(
        model_path=f"./models/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}",
        test_images=test_images,
        optimal_threshold=optimal_threshold,
        custom_loss=True if SUBVERSION == 4 else False,
    )

    confusion_matrix_chart = graphics.create_confusion_matrix_chart(
        conf_matrix=confusion_matrix,
        model_name=f"{MODEL_NAME} {VERSION}.{SUBVERSION}",
    )
    confusion_matrix_chart.write_image(
        f"./reports/{MODEL_NAME}/{VERSION}/{VERSION}.{SUBVERSION}/confusion_matrix_{VERSION}_{SUBVERSION}.png")

    evaluation_results["loss_function"] = LOSS_NAME
    all_results.append(pd.DataFrame([evaluation_results]))

    SUBVERSION += 1

final_results = pd.concat(all_results, ignore_index=True)
final_results.to_csv(f"./logs/{MODEL_NAME}/{VERSION}/loss_function_comparison_results.csv", index=False)