In [None]:
!pip3 install keras keras-cv tensorflow tensorflow-metal matplotlib --upgrade

In [None]:
import keras
import keras_cv
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

from keras import layers
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras import layers, models, utils
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

IMG_SIZE = 256
BATCH_SIZE = 32
THRESHOLD = 0.5

In [None]:
random_seed = 22 # Must be same for train and validation
validation_split = 0.5
data_path = '../images'

 # TODO Crear un training dataset sin imagenes ambiguas y un test dataset con todas

ds_train = utils.image_dataset_from_directory(
directory = data_path,
batch_size = BATCH_SIZE,
validation_split = validation_split,
subset = 'training',
seed = random_seed)

ds_test = tf.keras.utils.image_dataset_from_directory(
directory = data_path,
batch_size = BATCH_SIZE,
validation_split = validation_split,
subset = 'validation',
seed = random_seed)

ds_train = ds_train.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_test = ds_test.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
iterator = iter(ds_train)
images, labels = next(iterator)

plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(images[i].numpy().astype(int))
    plt.title('1 - Valid' if labels[i] == 1 else '0 - Invalid')
plt.show()

In [None]:
def build_model():
    model = models.Sequential()
    
    data_preprocessing = keras.Sequential([
        keras_cv.layers.Resizing(IMG_SIZE, IMG_SIZE),
        keras_cv.layers.Grayscale(output_channels=1),
        layers.Rescaling(1./255, input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    ])

    data_augmentation = keras.Sequential([
        layers.RandomFlip("horizontal_and_vertical", input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        layers.RandomZoom(0.1),
    ])

    batch_normalization = keras.Sequential([
        keras.layers.BatchNormalization(),
    ])
    
    model.add(data_preprocessing)
    model.add(data_augmentation)
    model.add(batch_normalization)
    
    model.add(layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(32, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Dropout(0.2))
    model.add(layers.Flatten())
    
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1))
     
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
    
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=[keras.metrics.BinaryAccuracy(threshold=THRESHOLD)])
    
    return model

In [None]:
train_labels = np.concatenate([labels for _, labels in ds_train])
class_weights = compute_class_weight(class_weight = "balanced", classes = np.unique(train_labels), y = train_labels)
class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)

In [None]:
model = build_model()

epochs = 200

model_checkpoint = ModelCheckpoint(filepath='best_model.keras', save_best_only=True, save_weights_only=False, monitor='val_binary_accuracy', mode='max', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=25, mode='min', verbose=1)

history = model.fit(
    ds_train, 
    epochs=epochs, 
    validation_data=ds_test,  
    #class_weight=class_weights,
    callbacks=[model_checkpoint])

In [None]:
best_model = models.load_model('best_model.keras')

In [None]:
test_loss, test_acc = best_model.evaluate(ds_test, verbose=0)

acc = history.history['binary_accuracy']
val_acc = history.history['val_binary_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.title('Accuracy Plot')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.title('Loss Plot')
plt.show()

In [None]:
predictions = best_model.predict(ds_test)
pred_labels = np.where(predictions > THRESHOLD, 1, 0)
real_labels = np.concatenate([labels.numpy() for _, labels in ds_test])

cm = tf.math.confusion_matrix(labels=real_labels, predictions=pred_labels)

plt.figure(figsize=(8, 6))
plt.imshow(cm, cmap=plt.cm.Greens)
plt.title('Confusion Matrix', fontsize=16)
plt.colorbar()

classes = ['Invalid', 'Valid']
plt.xticks(range(len(classes)), classes)
plt.yticks(range(len(classes)), classes)

text_labels = ['TN', 'FP', 'FN', 'TP']

for i in range(len(classes)):
    for j in range(len(classes)):
        plt.text(j, i, f"{text_labels[i*len(classes)+j]}: {cm[i, j]}", ha='center', va='center', fontsize=12)

plt.ylabel('Real Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.show()

In [None]:
target_names = ['Invalid', 'Valid']

false_positives = np.unique(np.where((pred_labels.flatten() == 1) & (real_labels == 0))[0])
false_negatives = np.unique(np.where((pred_labels.flatten() == 0) & (real_labels == 1))[0])

# Imprimir las imágenes de falsos positivos
plt.figure(figsize=(10, 10))
plt.suptitle("Falsos Positivos")
for i, idx in enumerate(false_positives[:25]):
    image, label = ds_test.unbatch().skip(idx).take(1).as_numpy_iterator().next()
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(image.astype(int))
    plt.title(f'{label} - {target_names[label]}')

plt.show()

# Imprimir las imágenes de falsos negativos
plt.figure(figsize=(10, 10))
plt.suptitle("Falsos Negativos")
for i, idx in enumerate(false_negatives[:25]):
    image, label = ds_test.unbatch().skip(idx).take(1).as_numpy_iterator().next()
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(image.astype(int))
    plt.title(f'{label} - {target_names[label]}')

plt.show()

In [None]:
all_images = tf.keras.utils.image_dataset_from_directory(directory = data_path)
all_images = all_images.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

predictions = best_model.predict(all_images)
pred_labels = np.where(predictions > THRESHOLD, 1, 0)
real_labels = np.concatenate([labels.numpy() for _, labels in all_images])

cm = tf.math.confusion_matrix(labels=real_labels, predictions=pred_labels)

plt.figure(figsize=(8, 6))
plt.imshow(cm, cmap=plt.cm.Greens)
plt.title('Confusion Matrix', fontsize=16)
plt.colorbar()

classes = ['Invalid', 'Valid']
plt.xticks(range(len(classes)), classes)
plt.yticks(range(len(classes)), classes)

text_labels = ['TN', 'FP', 'FN', 'TP']

for i in range(len(classes)):
    for j in range(len(classes)):
        plt.text(j, i, f"{text_labels[i*len(classes)+j]}: {cm[i, j]}", ha='center', va='center', fontsize=12)

plt.ylabel('Real Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.show()

In [None]:
invalid_rejected_rate = cm[0, 0] / (cm[0, 0] + cm[0, 1])
valid_rejected_rate = cm[1, 0] / (cm[1, 0] + cm[1, 1])

rates = [invalid_rejected_rate, valid_rejected_rate]
classes = ['Invalid images', 'Valid images']
labels = ['Not rejected', 'Rejected']
colors = ['green', 'red']

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# Ajustar opciones de estilo para los gráficos de tipo pie
pie_style = {'autopct': '%.1f%%', 'startangle': 90, 'textprops': {'fontsize': 12}}

for i in range(2):
    # Gráfico de tipo pie para tasas con colores invertidos en el primer gráfico
    axs[i].pie([1 - rates[i], rates[i]], labels=labels, colors=[colors[1 - i], colors[0 + i]], **pie_style)
    axs[i].set_title(classes[i], fontsize=14)

plt.tight_layout()
plt.show()

In [None]:
target_names = ['Invalid', 'Valid']

false_positives = np.unique(np.where((pred_labels.flatten() == 1) & (real_labels == 0))[0])
false_negatives = np.unique(np.where((pred_labels.flatten() == 0) & (real_labels == 1))[0])

# Imprimir las imágenes de falsos positivos
plt.figure(figsize=(10, 10))
plt.suptitle("Falsos Positivos")
for i, idx in enumerate(false_positives[:25]):
    image, label = all_images.unbatch().skip(idx).take(1).as_numpy_iterator().next()
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(image.astype(int))
    plt.title(f'{label} - {target_names[label]}')

plt.show()

# Imprimir las imágenes de falsos negativos
plt.figure(figsize=(10, 10))
plt.suptitle("Falsos Negativos")
for i, idx in enumerate(false_negatives[:25]):
    image, label = all_images.unbatch().skip(idx).take(1).as_numpy_iterator().next()
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(image.astype(int))
    plt.title(f'{label} - {target_names[label]}')

plt.show()

In [None]:
test_image = "https://media.adeo.com/marketplace/LMES/83431706/1834868.jpeg"
test_image_path = tf.keras.utils.get_file('Test Image 8', origin=test_image)

img = tf.keras.utils.load_img(
    test_image_path, target_size=(IMG_SIZE, IMG_SIZE)
)

img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, axis=0)

predictions = best_model.predict(img_array)
predicted_class = 1 if predictions[0][0] > THRESHOLD else 0
confidence = 100 * np.abs(predictions[0][0])

print(
    "This image most likely belongs to {}. Confidence {:.2f}"
    .format(classes[predicted_class], confidence)
)