In [None]:
#====================================================================================================#
#                                                                                                    #
#                                                        ██╗   ██╗   ████████╗ █████╗ ██████╗        #
#      Competición - INAR                                ██║   ██║   ╚══██╔══╝██╔══██╗██╔══██╗       #
#                                                        ██║   ██║█████╗██║   ███████║██║  ██║       #
#      created:        29/10/2025  -  23:00:15           ██║   ██║╚════╝██║   ██╔══██║██║  ██║       #
#      last change:    05/11/2025  -  02:55:40           ╚██████╔╝      ██║   ██║  ██║██████╔╝       #
#                                                         ╚═════╝       ╚═╝   ╚═╝  ╚═╝╚═════╝        #
#                                                                                                    #
#      Ismael Hernandez Clemente                         ismael.hernandez@live.u-tad.com             #
#                                                                                                    #
#      Github:                                           https://github.com/ismaelucky342            #
#                                                                                                    #
#====================================================================================================#

# Iteración 4 - Transfer Learning VGG16 

Transfer Learning con VGG16 pre-entrenado en ImageNet y fix binario. Todas las capas convolucionales congeladas, solo entrenamos cabecera personalizada.

**Arquitectura**: VGG16 (congelado) + Dense(256) + Dropout(0.5) + Dense(1, sigmoid)

**Kaggle Score**: 0.86380 (Posición #7)

In [None]:
# Imports básicos y configuración de rutas
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow import data as tf_data
import keras

seed = 42
keras.utils.set_random_seed(seed)

DATASET_NAME = "u-tad-dogs-vs-cats-2025"
TRAIN_PATH = f"/kaggle/input/{DATASET_NAME}/train/train"
TEST_PATH = f"/kaggle/input/{DATASET_NAME}/test/test"
SUPP_PATH = f"/kaggle/input/{DATASET_NAME}/supplementary_data/supplementary_data"

print("Versión de Keras:", keras.__version__)

## Carga y Preparación de Datos

In [None]:
# Cargo datos en modo binary con split 80/20 y tamaño VGG16
image_size = (224, 224)  # VGG16 requiere 224x224
batch_size = 125

train_ds, val_ds = keras.utils.image_dataset_from_directory(
    TRAIN_PATH,
    validation_split=0.2,
    subset="both",
    seed=seed,
    image_size=image_size,
    batch_size=batch_size,
    labels="inferred",
    label_mode="binary",  # Binary para 2 clases
)

print(f"Training batches: {len(train_ds)}")
print(f"Validation batches: {len(val_ds)}")

## Data Augmentation mas "conservador"

In [None]:
# Data augmentation conservador para no distorsionar mucho
data_augmentation = keras.Sequential([
    keras.layers.RandomFlip("horizontal"),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
], name="data_augmentation")

print("Data Augmentation configurado")

## Construcción del Modelo Transfer Learning

In [None]:
# Cargo VGG16 pre-entrenado y congelo todas las capas
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.applications import VGG16

input_shape = image_size + (3,)

# VGG16 pre-entrenado sin clasificación
base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=input_shape,
    pooling='avg'
)

# Congelo todas las capas de VGG16
base_model.trainable = False

# Modelo completo con mi cabecera personalizada
model = Sequential([
    keras.Input(shape=input_shape),
    data_augmentation,
    base_model,
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
], name='VGG16_Transfer_Learning')

print(f"Capas VGG16 congeladas: {len(base_model.layers)}")
model.summary()

## Compilación y Entrenamiento

In [None]:
# Compilo con Adam lr bajo y añado ReduceLROnPlateau
%%time

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

epochs = 15

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

print(f"Épocas: {epochs}")
print(f"Optimizer: Adam (lr=0.0001)")
print("-" * 60)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    callbacks=[reduce_lr],
    verbose=1
)

print("-" * 60)
print(f"Val Accuracy final: {history.history['val_accuracy'][-1]:.4f}")
print(f"Val Precision final: {history.history['val_precision'][-1]:.4f}")
print(f"Val Recall final: {history.history['val_recall'][-1]:.4f}")

## Visualización de Curvas

In [None]:
# Pinto las curvas de entrenamiento para ver si hay overfitting
logs = pd.DataFrame(history.history)

plt.figure(figsize=(14, 4))

plt.subplot(1, 2, 1)
plt.plot(logs.loc[1:, "loss"], lw=2, label='Pérdida en entrenamiento')
plt.plot(logs.loc[1:, "val_loss"], lw=2, label='Pérdida en validación')
plt.xlabel("Época")
plt.ylabel("Pérdida")
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(logs.loc[1:, "accuracy"], lw=2, label='Precisión en entrenamiento')
plt.plot(logs.loc[1:, "val_accuracy"], lw=2, label='Precisión en validación')
plt.xlabel("Época")
plt.ylabel("Precisión")
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nPrecisión final en entrenamiento: {logs['accuracy'].iloc[-1]:.4f}")
print(f"Precisión final en validación: {logs['val_accuracy'].iloc[-1]:.4f}")

## Guardado del Modelo

In [None]:
# Guardo el modelo entrenado
model.save("model.keras")
print("Modelo guardado como 'model.keras'")

## Evaluación con Datos Suplementarios

In [None]:
# Evalúo con supplementary para tener métrica más realista
supplementary_ds = keras.utils.image_dataset_from_directory(
    SUPP_PATH,
    image_size=image_size,
    batch_size=batch_size,
    labels="inferred",
    label_mode="binary",
)

print("Evaluando con datos suplementarios...")
results = model.evaluate(supplementary_ds, return_dict=True, verbose=1)

print(f"\nSupplementary Accuracy: {results['accuracy']:.4f}")
print(f"Supplementary Precision: {results['precision']:.4f}")
print(f"Supplementary Recall: {results['recall']:.4f}")

## Generación de Predicciones

In [None]:
# Genero predicciones imagen por imagen para el test
%%time

predictions_dict = {}

print(f"Generando predicciones para {len(os.listdir(TEST_PATH))} imágenes...")

for img in os.listdir(TEST_PATH):
    img_path = os.path.join(TEST_PATH, img)
    file_name = img_path.split('/')[-1]
    file_no_extension = file_name.split('.')[0]
    
    img_loaded = keras.utils.load_img(img_path, target_size=image_size)
    img_array = keras.utils.img_to_array(img_loaded)
    img_array = keras.ops.expand_dims(img_array, 0)
    
    prediction = model.predict(img_array, verbose=0)[0][0]
    label = 1 if prediction >= 0.5 else 0
    
    predictions_dict[int(file_no_extension)] = label

print(f"Predicciones completadas: {len(predictions_dict)}")

## Creación del Archivo de Submission

In [None]:
# Creo el CSV de submission y verifico que esté bien
submission = pd.DataFrame(predictions_dict.items(), columns=["id", "label"])
submission = submission.sort_values(by='id', ascending=True)
submission.to_csv('submission.csv', index=False)

print("="*60)
print("ARCHIVO DE SUBMISSION CREADO")
print("="*60)
print("\nDistribución de predicciones:")
print(submission["label"].value_counts())
print(f"\nClase 0 (Cat): {(submission['label'] == 0).sum()} imágenes")
print(f"Clase 1 (Dog): {(submission['label'] == 1).sum()} imágenes")
print(f"Total: {len(submission)} imágenes")

if (submission['label'] == 0).sum() == len(submission) or (submission['label'] == 1).sum() == len(submission):
    print("\nALERTA: Todas las predicciones son de una sola clase")
else:
    print("\nDistribución OK - Listo para enviar")