In [None]:
#====================================================================================================#
#                                                                                                    #
#                                                        ██╗   ██╗   ████████╗ █████╗ ██████╗        #
#      Competición - INAR                                ██║   ██║   ╚══██╔══╝██╔══██╗██╔══██╗       #
#                                                        ██║   ██║█████╗██║   ███████║██║  ██║       #
#      created:        29/10/2025  -  23:00:15           ██║   ██║╚════╝██║   ██╔══██║██║  ██║       #
#      last change:    04/11/2025  -  09:12:54           ╚██████╔╝      ██║   ██║  ██║██████╔╝       #
#                                                         ╚═════╝       ╚═╝   ╚═╝  ╚═╝╚═════╝        #
#                                                                                                    #
#      Ismael Hernandez Clemente                         ismael.hernandez@live.u-tad.com             #
#                                                                                                    #
#      Github:                                           https://github.com/ismaelucky342            #
#                                                                                                    #
#====================================================================================================#

# Iteración 1 - CNN Básica

Primera iteración siguiendo la plantilla de Kaggle con una CNN simple de 3 capas convolucionales.

**Arquitectura**: Conv2D(32) → Conv2D(64) → Conv2D(128) → Dense(512) → Dense(2)

**Kaggle Score**: No presentado, probado en local, llegué a la conclusión de que había que meter data augmentation el cual ya estaba planteado (se entregará en la siguiente)

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow import data as tf_data
import keras

seed = 42
keras.utils.set_random_seed(seed)

print("Versión de Keras:", keras.__version__)

## Carga de Datos

In [None]:
image_size = (256, 256)
batch_size = 125

train_ds, val_ds = keras.utils.image_dataset_from_directory(
    "/kaggle/input/u-tad-dogs-vs-cats-2025/train/train",
    validation_split=0.2,
    subset="both",
    seed=seed,
    image_size=image_size,
    batch_size=batch_size,
    labels="inferred",
    label_mode="categorical",
)

print(f"Training batches: {len(train_ds)}")
print(f"Validation batches: {len(val_ds)}")

## Construcción del Modelo - CNN Básica

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

input_shape = image_size + (3,)

model = Sequential([
    keras.Input(shape=input_shape),
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(2, activation='softmax')
])

model.summary()

## Compilación y Entrenamiento

In [None]:
%%time

model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=0.001, momentum=0.0),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

epochs = 12

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    verbose=1
)

## Visualización de Curvas de Aprendizaje

In [None]:
logs = pd.DataFrame(history.history)

plt.figure(figsize=(14, 4))
plt.subplot(1, 2, 1)
plt.plot(logs.loc[1:,"loss"], lw=2, label='training loss')
plt.plot(logs.loc[1:,"val_loss"], lw=2, label='validation loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(logs.loc[1:,"accuracy"], lw=2, label='training accuracy')
plt.plot(logs.loc[1:,"val_accuracy"], lw=2, label='validation accuracy')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(loc='lower right')
plt.show()

## Guardado del Modelo

In [None]:
model.save("model.keras")
print("Modelo guardado")

## Evaluación con Datos Suplementarios

In [None]:
supplementary_ds = keras.utils.image_dataset_from_directory(
    "/kaggle/input/u-tad-dogs-vs-cats-2025/supplementary_data/supplementary_data",
    image_size=image_size,
    batch_size=batch_size,
    labels="inferred",
    label_mode="categorical",
)

results = model.evaluate(supplementary_ds, return_dict=True, verbose=1)
print(f"\nSupplementary Accuracy: {results['accuracy']:.4f}")

## Generación de Predicciones

In [None]:
%%time

folder_path = "/kaggle/input/u-tad-dogs-vs-cats-2025/test/test"
predictions_dict = {}

for img in os.listdir(folder_path):
    img_path = os.path.join(folder_path, img)
    file_name = img_path.split('/')[-1]
    file_no_extension = file_name.split('.')[0]
    
    img_loaded = keras.utils.load_img(img_path, target_size=image_size)
    img_array = keras.utils.img_to_array(img_loaded)
    img_array = keras.ops.expand_dims(img_array, 0)
    
    prediction = model.predict(img_array, verbose=0)
    label = np.argmax(prediction)
    
    predictions_dict[int(file_no_extension)] = label

print(f"Predicciones generadas: {len(predictions_dict)}")

## Creación del Archivo de Submission

In [None]:
submission = pd.DataFrame(predictions_dict.items(), columns=["id", "label"])
submission = submission.sort_values(by='id', ascending=True)
submission.to_csv('submission.csv', index=False)

print("Distribución de predicciones:")
print(submission["label"].value_counts())
print(f"\nTotal: {len(submission)} imágenes")