In [1]:
import cv2 as cv
import albumentations as A
import os
import sys
import datetime
import io

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Input, BatchNormalization, Layer, Dropout, Resizing, Rescaling, RandomFlip, RandomRotation
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import BinaryAccuracy, FalsePositives, FalseNegatives, TruePositives, Accuracy,TrueNegatives, AUC, Precision, Recall
from tensorflow.keras.callbacks import Callback, CSVLogger, EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import L2, L1
import tensorflow_probability as tfp
from tensorboard.plugins.hparams import api as hp

import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.metrics import confusion_matrix, roc_curve

# !pip install -U wandb
import wandb
# !wandb login

  check_for_updates()
2024-11-02 21:18:24.192440: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730593104.471144    1146 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730593104.550188    1146 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-02 21:18:25.264727: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
IMG_SIZE = 224
CONFIG = {
    "input_shape": (IMG_SIZE, IMG_SIZE, 3),
    "filters_1": 6,
    "filters_2": 16,
    "kernel_size": 3,
    "activation_1": "relu",
    "activation_2": "sigmoid",
    "dropout": 0.01,
    "regularization_l2": 0.1,
    "optimizer": "adam",
    "loss": "binary_crossentropy",
    "metrics": ["binary_accuracy"],
    "pool_size": 2,
    "strides_1": 1,
    "strides_2": 2,
    "dense_1": 32,
    "dense_2": 32,
    "dense_out": 1,
    "learning_rate": 0.01,
    "batch_size": 32,
    "epochs": 3,
    # "epochs": 1,
}

lenet_model = tf.keras.Sequential(
    [
        Input(shape=CONFIG["input_shape"]),
        Conv2D(
            filters=CONFIG["filters_1"],
            kernel_size=CONFIG["kernel_size"],
            strides=CONFIG["strides_1"],
            padding="valid",
            activation=CONFIG["activation_1"],
            kernel_regularizer=L2(CONFIG["regularization_l2"]),
        ),
        BatchNormalization(),
        MaxPool2D(pool_size=CONFIG["pool_size"], strides=CONFIG["strides_2"]),
        Dropout(rate=CONFIG["dropout"]),
        Conv2D(
            filters=CONFIG["filters_2"],
            kernel_size=CONFIG["kernel_size"],
            strides=CONFIG["strides_1"],
            padding="valid",
            activation=CONFIG["activation_1"],
            kernel_regularizer=L2(CONFIG["regularization_l2"]),
        ),
        BatchNormalization(),
        MaxPool2D(pool_size=CONFIG["pool_size"], strides=CONFIG["strides_2"]),
        Flatten(),
        Dense(
            CONFIG["dense_1"],
            activation=CONFIG["activation_1"],
            kernel_regularizer=L2(CONFIG["regularization_l2"]),
        ),
        BatchNormalization(),
        Dropout(rate=CONFIG["dropout"]),
        Dense(
            CONFIG["dense_2"],
            activation=CONFIG["activation_1"],
            kernel_regularizer=L2(CONFIG["regularization_l2"]),
        ),
        BatchNormalization(),
        Dense(1, activation=CONFIG["activation_2"]),
    ]
)

# untrained_model

In [12]:
model_file = "untrained_lenet_model.keras"
lenet_model.save(model_file)

with wandb.init(project="Malaria-Detection", entity="albertalvin8080-academic") as run:
    artifact = wandb.Artifact(name="untrained_model", type="lenet_model", metadata=CONFIG)
    artifact.add_file(model_file)
    artifact.save()

VBox(children=(Label(value='0.443 MB of 5.762 MB uploaded\r'), FloatProgress(value=0.07681391873576537, max=1.…

# trained_sequential_model

In [14]:
with wandb.init(project="Malaria-Detection", entity="albertalvin8080-academic") as run:
    untrained_model_artifact = run.use_artifact(
        "albertalvin8080-academic/Malaria-Detection/untrained_model:v0",
        type="lenet_model",
    )
    untrained_model_artifact.download()

    augmented_dataset_artifact = run.use_artifact(
        "albertalvin8080-academic/Malaria-Detection/augmented_dataset:v0",
        type="preprocessed_dataset",
    )
    augmented_dataset_artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m: Downloading large artifact augmented_dataset:v0, 294.62MB. 960 files... 
[34m[1mwandb[0m:   960 of 960 files downloaded.  
Done. 0:0:7.6


In [None]:
# Reduces memory footprint.
def npz_generator(file_paths):
    for file_path in file_paths:
        data = np.load(file_path, allow_pickle=True)
        image = data["image"]
        label = data["label"]
        yield image, label


with wandb.init(project="Malaria-Detection", entity="albertalvin8080-academic") as run:
    untrained_model_artifact = run.use_artifact(
        "albertalvin8080-academic/Malaria-Detection/untrained_model:v0",
        type="lenet_model",
    )
    augmented_dataset_artifact = run.use_artifact(
        "albertalvin8080-academic/Malaria-Detection/augmented_dataset:v0",
        type="preprocessed_dataset",
    )

    uma_file = "artifacts/untrained_model:v0/untrained_lenet_model.keras"
    augd_dir = "artifacts/augmented_dataset:v0/"
    augd_files = [
        os.path.join(augd_dir, file_name) for file_name in os.listdir(augd_dir)
    ]

    dataset = tf.data.Dataset.from_generator(
        lambda: npz_generator(augd_files),
        output_signature=(
            tf.TensorSpec(shape=CONFIG["input_shape"], dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.int64),
        ),
    )

    """
    NOTE: Tensorflow expects the generator to yield a number of items which is
    equal to the batch size. If it doesn't happen, the training reaches a halt.
    To resolve this issue, you can make the generator yield the same number of items
    as the batch size, or you can use .repeat() in the dataset and manually calculate
    the steps_per_epoch and pass it to the .fit() method.
    """
    dataset = (
        dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True)
        .batch(CONFIG["batch_size"])
        .repeat()  # Repeat the dataset indefinitely.
        .prefetch(tf.data.AUTOTUNE)
    )
    # Calculate steps_per_epoch based on the dataset size and batch size
    steps_per_epoch = len(augd_files) // CONFIG["batch_size"]

    model = tf.keras.models.load_model(uma_file)
    model.compile(
        optimizer=Adam(learning_rate=CONFIG["learning_rate"]),
        loss=CONFIG["loss"],
        metrics=CONFIG["metrics"],
    )

    history = model.fit(
        dataset, epochs=CONFIG["epochs"], steps_per_epoch=steps_per_epoch, verbose=2
    )

    model_file = "trained_lenet_model.keras"
    model.save(model_file)

    artifact = wandb.Artifact(name="trained_sequential_model", type="lenet_model")
    artifact.add_file(model_file)
    artifact.save()
    # run.log_artifact(artifact)

Epoch 1/3
30/30 - 27s - 913ms/step - binary_accuracy: 0.5615 - loss: 18.1231
Epoch 2/3
30/30 - 15s - 503ms/step - binary_accuracy: 0.5906 - loss: 6.5527
Epoch 3/3
30/30 - 15s - 509ms/step - binary_accuracy: 0.6219 - loss: 2.8692


VBox(children=(Label(value='0.146 MB of 17.186 MB uploaded\r'), FloatProgress(value=0.008492986198585281, max=…