In [25]:
import os
import pandas as pd
import plotly.graph_objects as go
import tensorflow as tf
import numpy as np
import zipfile
from sklearn.metrics import confusion_matrix, auc, roc_curve, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [26]:
!pip install kaleido



In [27]:
from google.colab import drive
drive.mount('/content/drive')

DRIVE_PATH = os.path.abspath("./drive/MyDrive/MegaClassifier")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
MODEL_NAME = "MegaClassifier_a"
VERSION = "v2"

In [29]:
DATASETS = {
    "MegaClassifier_a": "onlyDetectionsForTrain",
    "MegaClassifier_b": "emptyOriginalAnimalDetection",
    "MegaClassifier_c": "emptyNonEmptyDataset",
}

VERSIONS = {
    "v1": [
        "v1.0",
        "v1.1",
        "v1.2",
        "v1.3",
    ],
    "v2": [
        "v2.0",
        "v2.1",
        "v2.2"
    ],
    "v3": [
        "v3.0",
        "v3.1",
    ],
    "v4": [
        "v4.0",
        "v4.1",
        "v4.2",
    ]
}

EVAL = {
    "v1": {
        "batch_size": [
            16,
            32,
            64,
            128
        ],
    },

    "v2": {
        "optimizer": [
            "adam",
            "rmsprop",
            "sgd"
        ],
    },

    "v3": {
        "loss_function": [
            "binarycrossentropy",
            "binrayfocalcrossentropy",
            "weighted_binraycrossentropy",
        ],
    },
}

In [30]:
def create_accucary(history: pd.DataFrame, MODEL_NAME: str, SUBVERSION: str) -> go.Figure():
  graphic = go.Figure()

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['accuracy']) +1)),
          y=history["accuracy"],
          line=dict(width=2),
          name="Train"
      )
  )

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['val_accuracy']) +1)),
          y=history["val_accuracy"],
          line=dict(width=2),
          name="Validation"
      )
  )

  graphic.update_layout(
      title=f"{MODEL_NAME} {SUBVERSION}",
      xaxis_title="Epochs",
      yaxis_title="Accuracy",
      template="seaborn",
      width=700,
      height=500,
  )

  return graphic

def create_loss(history: pd.DataFrame, MODEL_NAME: str, SUBVERSION: str) -> go.Figure():
  graphic = go.Figure()

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['loss']) +1)),
          y=history["loss"],
          line=dict(width=2),
          name="Train"
      )
  )

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['val_loss']) +1)),
          y=history["val_loss"],
          line=dict(width=2),
          name="Validation"
      )
  )

  graphic.update_layout(
      title=f"{MODEL_NAME} {SUBVERSION}",
      xaxis_title="Epochs",
      yaxis_title="Loss",
      template="seaborn",
      width=700,
      height=500,
  )

  return graphic

def create_precision(history: pd.DataFrame, MODEL_NAME: str, SUBVERSION: str) -> go.Figure():
  graphic = go.Figure()

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['precision']) +1)),
          y=history["precision"],
          line=dict(width=2),
          name="Train"
      )
  )

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['val_precision']) +1)),
          y=history["val_precision"],
          line=dict(width=2),
          name="Validation"
      )
  )

  graphic.update_layout(
      title=f"{MODEL_NAME} {SUBVERSION}",
      xaxis_title="Epochs",
      yaxis_title="Precision",
      template="seaborn",
      width=700,
      height=500,
  )

  return graphic

def create_recall(history: pd.DataFrame, MODEL_NAME: str, SUBVERSION: str) -> go.Figure():
  graphic = go.Figure()

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['recall']) +1)),
          y=history["recall"],
          line=dict(width=2),
          name="Train"
      )
  )

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['val_recall']) +1)),
          y=history["val_recall"],
          line=dict(width=2),
          name="Validation"
      )
  )

  graphic.update_layout(
      title=f"{MODEL_NAME} {SUBVERSION}",
      xaxis_title="Epochs",
      yaxis_title="Recall",
      template="seaborn",
      width=700,
      height=500,
  )

  return graphic

def create_f1_score(history: pd.DataFrame, MODEL_NAME: str, SUBVERSION: str) -> go.Figure():
  graphic = go.Figure()

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['f1_score']) +1)),
          y=history["f1_score"],
          line=dict(width=2),
          name="Train"
      )
  )

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['val_f1_score']) +1)),
          y=history["val_f1_score"],
          line=dict(width=2),
          name="Validation"
      )
  )

  graphic.update_layout(
      title=f"{MODEL_NAME} {SUBVERSION}",
      xaxis_title="Epochs",
      yaxis_title="F1 Score",
      template="seaborn",
      width=700,
      height=500,
  )

  return graphic

def create_auc(history: pd.DataFrame, MODEL_NAME: str, SUBVERSION: str) -> go.Figure():
  graphic = go.Figure()

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['auc']) +1)),
          y=history["auc"],
          line=dict(width=2),
          name="Train"
      )
  )

  graphic.add_trace(
      go.Scatter(
          x=list(range(0, len(history['val_auc']) +1)),
          y=history["val_auc"],
          line=dict(width=2),
          name="Validation"
      )
  )

  graphic.update_layout(
      title=f"{MODEL_NAME} {SUBVERSION}",
      xaxis_title="Epochs",
      yaxis_title="AUC",
      template="seaborn",
      width=700,
      height=500,
  )

  return graphic

def create_confusion_matrix(conf_matrix, MODEL_NAME, SUBVERSION):
  fig = go.Figure(
    data=go.Heatmap(
        z=conf_matrix,
        x=["Animal", "Empty"],
        y=["Empty", "Animal"],
        text=conf_matrix,
        texttemplate="%{text}",
        colorscale="Blues",
        showscale=True,
    )
  )


  fig.update_layout(
      title=f"Confusion Matrix {MODEL_NAME} {SUBVERSION}",
      xaxis_title="Predicted value",
      yaxis_title="Actual value",
      xaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["Animal", "Empty"]),
      yaxis=dict(tickmode="array", tickvals=[0, 1], ticktext=["Empty", "Animal"]),
      template="seaborn",
      width=600,
      height=600,
  )

  return fig

def create_roc_curve(fpr, tpr, roc_auc, MODEL_NAME, SUBVERSION):
  fig = go.Figure()

  fig.add_trace(
      go.Scatter(
          x=fpr, y=tpr, mode="lines", name=f"(AUC = {roc_auc:.4f})"
      )
  )

  fig.add_trace(
      go.Scatter(
          x=[0, 1],
          y=[0, 1],
          mode="lines",
          name="(AUC = 0.5000)",
          line=dict(dash="dash"),
      )
  )

  fig.update_layout(
      title=f"ROC Curve {MODEL_NAME} {SUBVERSION}",
      xaxis_title="False Positive Rate",
      yaxis_title="True Positive Rate",
      legend_title="Curves",
      template="seaborn",
      width=700,
      height=500,
      xaxis=dict(
          tickmode="linear",
          tick0=0,
          dtick=0.1,
      ),
  )

  return fig

In [31]:
DATASET_PATH_ZIP = os.path.join(DRIVE_PATH, f"data/processed/{DATASETS[MODEL_NAME]}.zip")
with zipfile.ZipFile(DATASET_PATH_ZIP, 'r') as zip_ref:
      zip_ref.extractall("./data/processed")
DATASET_DIR = os.path.abspath(f"data/processed/{DATASETS[MODEL_NAME]}")

IMAGE_SIZE = (456, 456)
IMAGE_SHAPE = IMAGE_SIZE + (3,)
SEED = 42

datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
)
test_images = datagen.flow_from_directory(
  directory=f"{DATASET_DIR}/test",
  classes=['vacia', 'animal'],
  target_size=IMAGE_SIZE,
  class_mode="binary",
  shuffle=False,
  seed=SEED,
)

positive_count = sum(1 for label in test_images.labels if label == 1)
negative_count = sum(1 for label in test_images.labels if label == 0)
total_count = positive_count + negative_count

print(f"Animals: {positive_count}")
print(f"Empty: {negative_count}")
print(f"Total: {total_count}")

Found 4286 images belonging to 2 classes.
Animals: 2836
Empty: 1450
Total: 4286


In [32]:
test_table = pd.DataFrame(columns=['version', 'parameter', 'accuracy', 'loss', 'precision', 'recall', 'f1_score', 'auc', 'fp_rate', 'fn_rate'])

for SUBVERSION in VERSIONS[VERSION]:

  LOGS_PATH = os.path.join(DRIVE_PATH, f"logs/{MODEL_NAME}/{VERSION}/{SUBVERSION}")
  LOGS_VERSION_PATH = os.path.join(DRIVE_PATH, f"logs/{MODEL_NAME}/{VERSION}")
  MODELS_PATH = os.path.join(DRIVE_PATH, f"models/{MODEL_NAME}/{VERSION}/{SUBVERSION}")
  REPORTS_PATH = os.path.join(DRIVE_PATH, f"reports/2_trainning_results/{MODEL_NAME}/{VERSION}/")

  os.makedirs(REPORTS_PATH, exist_ok=True)

  history = pd.read_csv(os.path.join(LOGS_PATH, f"history_{SUBVERSION}.csv"), sep=";")

  epsilon = 1e-8
  history['f1_score'] = 2 * (
      history['precision'] * history['recall'] /
      (history['precision'] + history['recall'] + epsilon)
  )

  history['val_f1_score'] = 2 * (
      history['val_precision'] * history['val_recall'] /
      (history['val_precision'] + history['val_recall'] + epsilon)
  )

  history.to_csv(os.path.join(LOGS_PATH, f"history_{SUBVERSION}.csv"), sep=";", index=False)

  accuracy_graphics = create_accucary(history, MODEL_NAME, SUBVERSION)
  accuracy_graphics.write_image(os.path.join(REPORTS_PATH, f"accuracy_{SUBVERSION}.png"))

  loss_graphics = create_loss(history, MODEL_NAME, SUBVERSION)
  loss_graphics.write_image(os.path.join(REPORTS_PATH, f"loss_{SUBVERSION}.png"))

  precision_graphics = create_precision(history, MODEL_NAME, SUBVERSION)
  precision_graphics.write_image(os.path.join(REPORTS_PATH, f"precision_{SUBVERSION}.png"))

  recall_graphics = create_recall(history, MODEL_NAME, SUBVERSION)
  recall_graphics.write_image(os.path.join(REPORTS_PATH, f"recall_{SUBVERSION}.png"))

  f1_graphics = create_f1_score(history, MODEL_NAME, SUBVERSION)
  f1_graphics.write_image(os.path.join(REPORTS_PATH, f"f1_score_{SUBVERSION}.png"))

  auc_graphics = create_auc(history, MODEL_NAME, SUBVERSION)
  auc_graphics.write_image(os.path.join(REPORTS_PATH, f"auc_{SUBVERSION}.png"))


  SUBVERSION_INDEX = VERSIONS[VERSION].index(SUBVERSION)
  EVAL_PARAMETER = list(EVAL[VERSION].keys())[0]
  EVAL_VALUE = EVAL[VERSION][EVAL_PARAMETER][SUBVERSION_INDEX]

  model = tf.keras.models.load_model(os.path.join(MODELS_PATH, f"{MODEL_NAME}_{SUBVERSION}.keras"))

  y_pred_prob = model.predict(test_images)
  results = model.evaluate(test_images)

  y_true = test_images.labels

  y_pred_class = (y_pred_prob > 0.5).astype(int)
  conf_matrix = confusion_matrix(y_true, y_pred_class)

  TN = conf_matrix[0][0]
  FN = conf_matrix[0][1]
  FP = conf_matrix[1][0]
  TP = conf_matrix[1][1]

  accuracy = (TP + TN) / (TP + TN + FP + FN)
  precision = TP / (TP + FP)
  recall = TP / (TP + FN)
  f1_score = 2 * (precision * recall) / (precision + recall + epsilon)
  fp_rate = FP / (FP + TN) if (FP + TN) > 0 else 0
  fn_rate = FN / (FN + TP) if (FN + TP) > 0 else 0

  fpr, tpr, thresholds = roc_curve(y_true, y_pred_prob)
  auc_predict = auc(fpr, tpr)

  class_report = classification_report(y_true, y_pred_class, target_names=["vacía", "animal"],output_dict=True)
  report = pd.DataFrame(class_report).transpose()
  report.to_csv(os.path.join(LOGS_PATH, f"classification_report_{SUBVERSION}.csv"), index=True, sep=";")

  conf_matrix = np.array([[FP, TN], [TP, FN]])
  matrix = pd.DataFrame(conf_matrix, columns=["Animal", "Empty"], index=["Empty", "Animal"])
  matrix.to_csv(os.path.join(LOGS_PATH, f"confusion_matrix_{SUBVERSION}.csv"), index=True, sep=";")
  confusion_matrix_fig = create_confusion_matrix(conf_matrix, MODEL_NAME, SUBVERSION)
  confusion_matrix_fig.write_image(os.path.join(REPORTS_PATH, f"confusion_matrix_{SUBVERSION}.png"))

  roc_curve_fig = create_roc_curve(fpr, tpr, auc_predict, MODEL_NAME, SUBVERSION)
  roc_curve_fig.write_image(os.path.join(REPORTS_PATH, f"roc_curve_{SUBVERSION}.png"))

  test_row = pd.DataFrame({
      'version': [str(SUBVERSION)],
       'parameter': [EVAL_VALUE],
       'accuracy': [accuracy],
       'loss': [results[0]],
       'precision': [precision],
       'recall': [recall],
       'f1_score': [f1_score],
       'auc': [auc_predict],
       'fp_rate': [fp_rate],
       'fn_rate': [fn_rate],
  })
  test_row.to_csv(os.path.join(LOGS_PATH, f"test_{SUBVERSION}.csv"), index=False, sep=";")

  test_table = pd.concat([test_table, test_row],ignore_index=True)

test_table.to_csv(os.path.join(LOGS_VERSION_PATH, f"test_{VERSION}.csv"), index=False, sep=";")
test_table


Skipping variable loading for optimizer 'rmsprop', because it has 4 variables whereas the saved optimizer has 6 variables. 


Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.



[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 235ms/step
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 192ms/step - accuracy: 0.9452 - auc: 0.6601 - loss: 0.1416 - precision: 0.5888 - recall: 0.6391



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 235ms/step
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 192ms/step - accuracy: 0.9490 - auc: 0.6601 - loss: 0.1375 - precision: 0.5949 - recall: 0.6398



Skipping variable loading for optimizer 'rmsprop', because it has 4 variables whereas the saved optimizer has 2 variables. 



[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 233ms/step
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 194ms/step - accuracy: 0.9332 - auc: 0.6565 - loss: 0.2062 - precision: 0.5798 - recall: 0.6343


Unnamed: 0,version,parameter,accuracy,loss,precision,recall,f1_score,auc,fp_rate,fn_rate
0,v2.0,adam,0.948903,0.118256,0.956982,0.965493,0.961218,0.990604,0.082712,0.034507
1,v2.1,rmsprop,0.951237,0.117617,0.956982,0.96894,0.962924,0.990673,0.082155,0.03106
2,v2.2,sgd,0.939571,0.163511,0.948166,0.960014,0.954054,0.984019,0.09899,0.039986
