In [None]:
import os

import pandas as pd
import tensorflow as tf
import tensorflow.keras.models as models
from sklearn.metrics import auc, confusion_matrix, roc_curve
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import src.graphics as graphics

In [None]:
MODEL_BASE_NAME = "MegaClassifier_c"
VERSION = 1

SUBVERSIONS = [0, 1, 2, 3]

BATCH_SIZES = [16, 32, 64, 128]


In [None]:
for SUBVERSION in SUBVERSIONS:
    BATCH_SIZE = BATCH_SIZES[SUBVERSION]
    IMAGE_SIZE = (456, 456)
    IMAGE_SHAPE = IMAGE_SIZE + (3,)
    SEED = 42

    LOGS_PATH = os.path.abspath("./logs")
    REPORT_PATH = os.path.abspath("./reports/fase_3")
    MODELS_PATH = os.path.abspath("./models")

    DATASET_CSV = os.path.abspath(
        "./data/processed/onlyDetectionsForTrain/onlyDetectionsForTrain.csv"
    )
    DATASET_PATH = os.path.dirname(DATASET_CSV)

    HISTORY_CSV = os.path.join(
        LOGS_PATH,
        f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/history_v{VERSION}.{SUBVERSION}.csv",
    )
    EVALUATION_CSV = os.path.join(
        LOGS_PATH,
        f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/evaluation_v{VERSION}.{SUBVERSION}.csv",
    )

    MODEL = os.path.join(
        MODELS_PATH,
        f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/{MODEL_BASE_NAME}_v{VERSION}_{SUBVERSION}.h5",
    )
    accuracy_graphics = graphics.create_training_accuracy_chart(
        history_path=HISTORY_CSV,
        title=f"Accuracy - {MODEL_BASE_NAME} v{VERSION}.{SUBVERSION}",
    )
    accuracy_graphics.show()
    os.makedirs(
        os.path.join(
            REPORT_PATH,
            f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}",
        ),
        exist_ok=True,
    )
    accuracy_graphics.write_image(
        os.path.join(
            REPORT_PATH,
            f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/accuracy_v{VERSION}.{SUBVERSION}.png",
        )
    )
    loss_graphics = graphics.create_training_loss_chart(
        history_path=HISTORY_CSV,
        title=f"Loss - {MODEL_BASE_NAME} v{VERSION}.{SUBVERSION}",
    )
    loss_graphics.show()
    loss_graphics.write_image(
        os.path.join(
            REPORT_PATH,
            f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/loss_v{VERSION}.{SUBVERSION}.png",
        )
    )
    dataset = pd.read_csv(DATASET_CSV, sep=";")
    dataset["file_name"] = dataset["file_name"].apply(
        lambda x: os.path.join(DATASET_PATH, x)
    )
    dataset["binary_label"] = dataset["binary_label"].astype(str)

    test_dataset = dataset[dataset["subset"] == "test"]

    datagen = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
    )
    test_images = datagen.flow_from_dataframe(
        dataframe=test_dataset,
        x_col="file_name",
        y_col="binary_label",
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode="binary",
        shuffle=False,
        seed=SEED,
    )

    model = models.load_model(MODEL)

    results = model.evaluate(test_images)
    metric_names = [
        "test_loss",
        "test_accuracy",
        "test_precision",
        "test_recall",
        "test_auc",
    ]
    evaluation_results = {name: value for name, value in zip(metric_names, results)}

    evaluation = pd.DataFrame([evaluation_results])
    evaluation.to_csv(EVALUATION_CSV, sep=";", index=False)
    y_pred_prob = model.predict(test_images)
    y_true = test_images.labels

    fpr, tpr, thresholds = roc_curve(y_true, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    roc_curve_graphics = graphics.create_roc_curve_chart(
        fpr=fpr,
        tpr=tpr,
        roc_auc=roc_auc,
        model_name=f"{MODEL_BASE_NAME} v{VERSION}.{SUBVERSION}",
    )
    roc_curve_graphics.show()

    ROC_PATH = os.path.join(
        REPORT_PATH,
        f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/roc_curve_v{VERSION}_{SUBVERSION}.png",
    )
    os.makedirs(os.path.dirname(ROC_PATH), exist_ok=True)
    roc_curve_graphics.write_image(ROC_PATH)

    results_df = pd.DataFrame(
        {"fpr": fpr, "tpr": tpr, "thresholds": thresholds, "roc_auc": roc_auc}
    )
    results_df.to_csv(
        os.path.join(
            LOGS_PATH,
            f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/roc_values_v{VERSION}_{SUBVERSION}.csv",
        ),
        sep=";",
        index=False,
    )
    y_pred_class = (y_pred_prob > 0.5).astype(int)
    conf_matrix = confusion_matrix(y_true, y_pred_class)
    conf_matrix_text = [[str(value) for value in row] for row in conf_matrix]

    confusion_matrix_graphics = graphics.create_confusion_matrix_chart(
        conf_matrix=conf_matrix,
        conf_matrix_text=conf_matrix_text,
        model_name=f"{MODEL_BASE_NAME} v{VERSION}.{SUBVERSION}",
    )
    confusion_matrix_graphics.show()
    confusion_matrix_graphics.write_image(
        os.path.join(
            REPORT_PATH,
            f"{MODEL_BASE_NAME}/v{VERSION}/v{VERSION}.{SUBVERSION}/confusion_matrix_v{VERSION}.{SUBVERSION}.png",
        )
    )