In [None]:
#====================================================================================================#
#                                                                                                    #
#                                                        ██╗   ██╗   ████████╗ █████╗ ██████╗        #
#      Competición - INAR                                ██║   ██║   ╚══██╔══╝██╔══██╗██╔══██╗       #
#                                                        ██║   ██║█████╗██║   ███████║██║  ██║       #
#      created:        07/11/2025  -  05:00:00           ██║   ██║╚════╝██║   ██╔══██║██║  ██║       #
#      last change:    10/11/2025  -  11:34:43           ╚██████╔╝      ██║   ██║  ██║██████╔╝       #
#                                                         ╚═════╝       ╚═╝   ╚═╝  ╚═╝╚═════╝        #
#                                                                                                    #
#      Ismael Hernandez Clemente                         ismael.hernandez@live.u-tad.com             #
#                                                                                                    #
#      Github:                                           https://github.com/ismaelucky342            #
#                                                                                                    #
#====================================================================================================# 

# Competición Perretes y Gatos

## Iteración 9 - Migración a EfficientNetB3


In [None]:
# Fix protobuf compatibility issue
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

from tensorflow import data as tf_data
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.applications import EfficientNetB3
from keras.callbacks import EarlyStopping, ModelCheckpoint

seed = 42
keras.utils.set_random_seed(seed)
np.random.seed(seed)

# Rutas dataset
DATASET_NAME = "u-tad-dogs-vs-cats-2025"
TRAIN_PATH = f"/kaggle/input/{DATASET_NAME}/train/train"
TEST_PATH = f"/kaggle/input/{DATASET_NAME}/test/test"
SUPP_PATH = f"/kaggle/input/{DATASET_NAME}/supplementary_data/supplementary_data"

print("Keras:", keras.__version__)

## Config 

In [None]:
IMG_SIZE = 300  # EfficientNetB3 nativo (mejor que 260)
BATCH_SIZE = 64
N_FINE_TUNE_LAYERS = 20  # Más capas para fine-tuning
EPOCHS_TL = 12  # Reducido con mejor early stopping
EPOCHS_FT = 8   # Reducido con mejor early stopping
DROPOUT_RATE = 0.3  # Menos dropout (modelo más grande)
LABEL_SMOOTHING = 0.05  # Reducido (0.1 era demasiado)
# MIXUP_ALPHA eliminado - demasiado agresivo

print(f"Modelo: EfficientNetB3 (optimizado)")
print(f"Resolución: {IMG_SIZE}x{IMG_SIZE} (nativa B3)")
print(f"Batch size: {BATCH_SIZE}")
print(f"Dropout: {DROPOUT_RATE}")
print(f"Label Smoothing: {LABEL_SMOOTHING}")
print(f"Mixup: DISABLED (mejor sin él)")
print(f"Fine-tune layers: {N_FINE_TUNE_LAYERS}")
print(f"Épocas TL: {EPOCHS_TL}")
print(f"Épocas FT: {EPOCHS_FT}")

## Carga de Datos

Todo igual q antes, pero con batch_size=16 para que q entre B7 en memoria.

In [None]:
train_dataset = keras.utils.image_dataset_from_directory(
    TRAIN_PATH,
    labels='inferred',
    label_mode='binary',
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=True,
    seed=seed,
    validation_split=0.2,
    subset='training',
    interpolation='bilinear',
)

validation_dataset = keras.utils.image_dataset_from_directory(
    TRAIN_PATH,
    labels='inferred',
    label_mode='binary',
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=True,
    seed=seed,
    validation_split=0.2,
    subset='validation',
    interpolation='bilinear',
)

test_dataset = keras.utils.image_dataset_from_directory(
    TEST_PATH,
    labels=None,
    label_mode=None,
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=False,
    seed=seed,
    interpolation='bilinear',
)

supplementary_dataset = keras.utils.image_dataset_from_directory(
    SUPP_PATH,
    labels='inferred',
    label_mode='binary',
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=False,
    seed=seed,
    interpolation='bilinear',
)

print(f"Train batches: {len(train_dataset)}")
print(f"Validation batches: {len(validation_dataset)}")
print(f"Test batches: {len(test_dataset)}")
print(f"Supplementary batches: {len(supplementary_dataset)}")

## Mixup Augmentation

Técnica avanzada que mezcla imágenes y labels para mejor generalización.

In [None]:
# MIXUP ELIMINADO - No mejoraba resultados y añadía overhead
# Con EfficientNetB3 + más datos, mejor enfocarse en calidad de imágenes
print("Mixup disabled for better convergence")

## Data Augmentation

Mismo q antes 
- Flip horizontal + vertical
- Rotation ±20%
- Zoom ±20%
- Translation ±15%
- Contrast ±20%
- Brightness ±20%

In [None]:
# Augmentation OPTIMIZADO - Menos agresivo
data_augmentation = keras.Sequential([
    keras.layers.RandomFlip("horizontal"),  # Solo horizontal (más natural)
    keras.layers.RandomRotation(0.1),  # Reducido de 0.15
    keras.layers.RandomZoom(0.1),  # Reducido de 0.15
    keras.layers.RandomTranslation(height_factor=0.08, width_factor=0.08),  # Reducido
    keras.layers.RandomContrast(0.1),  # Reducido
], name="data_augmentation")

# SIN Mixup - aplicar solo augmentation estándar
train_dataset_augmented = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=tf_data.AUTOTUNE
)

print("Augmentation OPTIMIZADO (menos agresivo, sin Mixup)")

In [None]:
# Cargo EfficientNetB3 pre-entrenado
efficientnet_base = EfficientNetB3(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling='avg'
)

# Congelo todo inicialmente
for layer in efficientnet_base.layers:
    layer.trainable = False

# Construcción OPTIMIZADA - Menos capas Dense
efficientnet_model = Sequential([
    efficientnet_base,
    keras.layers.BatchNormalization(),
    Dense(256, activation='relu'),  # Reducido de 512
    Dropout(DROPOUT_RATE),
    Dense(128, activation='relu'),  # Reducido de 256
    Dropout(DROPOUT_RATE / 2),  # Menos dropout en segunda capa
    Dense(1, activation='sigmoid')
])

# Compilar con AdamW + Cosine Decay
initial_lr = 1e-3
efficientnet_model.compile(
    optimizer=keras.optimizers.AdamW(
        learning_rate=initial_lr,
        weight_decay=1e-4
    ),
    loss=keras.losses.BinaryCrossentropy(
        label_smoothing=LABEL_SMOOTHING
    ),
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

efficientnet_model.summary()
print(f"EfficientNetB3 layers: {len(efficientnet_base.layers)}")
print(f"Arquitectura OPTIMIZADA: Menos capas Dense, dropout reducido")

## Transfer Learning

Entreno solo las capas Dense q añadí, con EfficientNetB7 congelado.

**15 épocas** con early stopping (patience=5).

In [None]:
# Transfer Learning OPTIMIZADO
print("Starting Transfer Learning (optimized)...")

# Cosine Decay para mejor convergencia
cosine_decay_tl = keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=1e-3,
    decay_steps=EPOCHS_TL * len(train_dataset_augmented),
    alpha=0.1
)

# RECOMPILAR con Cosine Decay (no asignar después)
efficientnet_model.compile(
    optimizer=keras.optimizers.AdamW(
        learning_rate=cosine_decay_tl,  # Pasar el schedule aquí
        weight_decay=1e-4
    ),
    loss=keras.losses.BinaryCrossentropy(
        label_smoothing=LABEL_SMOOTHING
    ),
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

efficientnet_history_tl = efficientnet_model.fit(
    train_dataset_augmented,
    epochs=EPOCHS_TL,
    validation_data=validation_dataset,
    callbacks=[
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        ),
        # ReduceLROnPlateau ELIMINADO - incompatible con CosineDecay
        keras.callbacks.ModelCheckpoint(
            'best_model_tl.keras',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max'
        )
    ]
)

print("Transfer Learning completed (with Cosine Decay)")

## Visualización TL

In [None]:
# Gráficos Transfer Learning (robustos a nombres de métricas)
try:
    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.plot(efficientnet_history_tl.history.get('loss', []), label='Train')
    plt.plot(efficientnet_history_tl.history.get('val_loss', []), label='Validation')
    plt.title('Loss - Transfer Learning B7')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 3, 2)
    plt.plot(efficientnet_history_tl.history.get('accuracy', []), label='Train')
    plt.plot(efficientnet_history_tl.history.get('val_accuracy', []), label='Validation')
    plt.title('Accuracy - Transfer Learning B7')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    # Detect keys for precision/recall (Keras names may vary)
    precision_key = next((k for k in ['precision_1', 'precision'] if k in efficientnet_history_tl.history), None)
    recall_key = next((k for k in ['recall_1', 'recall'] if k in efficientnet_history_tl.history), None)

    plt.subplot(1, 3, 3)
    if precision_key and recall_key:
        plt.plot(efficientnet_history_tl.history[precision_key], label='Precision')
        plt.plot(efficientnet_history_tl.history[recall_key], label='Recall')
    else:
        plt.plot([], [], label='Precision (n/a)')
        plt.plot([], [], label='Recall (n/a)')

    plt.title('Precision & Recall - Transfer Learning B7')
    plt.xlabel('Epoch')
    plt.ylabel('Score')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()
except Exception as e:
    print(f"Warning: Plotting failed ({e}). Continuing...")

## Evaluación Supplementary (post-TL)

Veo cómo va ANTES del fine-tuning.

In [None]:
print("Evaluating on supplementary dataset (post-TL)...")

efficientnet_supp_results_tl = efficientnet_model.evaluate(supplementary_dataset, verbose=1)
efficientnet_supp_accuracy_tl = efficientnet_supp_results_tl[1]

print(f"Supplementary Accuracy (TL): {efficientnet_supp_accuracy_tl:.4f}")

## Fine-tuning - DESCONGELAR 15 CAPAS

Ahora descongelo las **últimas 15 capas** de B3 (optimizado).

LR MUY bajo (5e-6) para que no romper lo pre-entrenado.

In [None]:
# Descongelo MÁS capas de B3 (20 en vez de 15)
print(f"Unfreezing last {N_FINE_TUNE_LAYERS} layers...")

for layer in efficientnet_base.layers[-N_FINE_TUNE_LAYERS:]:
    layer.trainable = True

# Cosine Decay para fine-tuning
cosine_decay_ft = keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=3e-6,  # Más bajo (era 5e-6)
    decay_steps=EPOCHS_FT * len(train_dataset_augmented),
    alpha=0.05
)

# Recompilo con AdamW + Cosine Decay
efficientnet_model.compile(
    optimizer=keras.optimizers.AdamW(
        learning_rate=cosine_decay_ft,
        weight_decay=3e-5  # Menos weight decay
    ),
    loss=keras.losses.BinaryCrossentropy(
        label_smoothing=LABEL_SMOOTHING
    ),
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

trainable_count = sum([layer.trainable for layer in efficientnet_base.layers])
print(f"Trainable layers: {trainable_count}/{len(efficientnet_base.layers)}")
print(f"Optimizer: AdamW (lr=3e-6 Cosine Decay, weight_decay=3e-5)")

In [None]:
# Fine-tuning OPTIMIZADO
print("Starting Fine-tuning (optimized)...")

efficientnet_history_ft = efficientnet_model.fit(
    train_dataset_augmented,
    epochs=EPOCHS_FT,
    validation_data=validation_dataset,
    callbacks=[
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=2,
            restore_best_weights=True
        ),
        keras.callbacks.ModelCheckpoint(
            'best_model_ft.keras',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max'
        )
    ]
)

print("Fine-tuning completed")

## Visualización Fine-tuning

In [None]:
try:
    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.plot(efficientnet_history_ft.history.get('loss', []), label='Train')
    plt.plot(efficientnet_history_ft.history.get('val_loss', []), label='Validation')
    plt.title('Loss - Fine-tuning B7')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 3, 2)
    plt.plot(efficientnet_history_ft.history.get('accuracy', []), label='Train')
    plt.plot(efficientnet_history_ft.history.get('val_accuracy', []), label='Validation')
    plt.title('Accuracy - Fine-tuning B7')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    # Detect keys for precision/recall (Keras names may vary)
    precision_key_ft = next((k for k in ['precision_1', 'precision'] if k in efficientnet_history_ft.history), None)
    recall_key_ft = next((k for k in ['recall_1', 'recall'] if k in efficientnet_history_ft.history), None)

    plt.subplot(1, 3, 3)
    if precision_key_ft and recall_key_ft:
        plt.plot(efficientnet_history_ft.history[precision_key_ft], label='Precision')
        plt.plot(efficientnet_history_ft.history[recall_key_ft], label='Recall')
    else:
        plt.plot([], [], label='Precision (n/a)')
        plt.plot([], [], label='Recall (n/a)')

    plt.title('Precision & Recall - Fine-tuning B7')
    plt.xlabel('Epoch')
    plt.ylabel('Score')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()
except Exception as e:
    print(f"Warning: Plotting failed ({e}). Continuing...")

## Evaluación Final Supplementary (post-FT)

Evaluación final en supplementary dataset SIN augmentation.

In [None]:
print("Evaluating on supplementary dataset (post-FT, NO augmentation)...")

efficientnet_supp_results_ft = efficientnet_model.evaluate(supplementary_dataset, verbose=1)
efficientnet_supp_accuracy_ft = efficientnet_supp_results_ft[1]

print(f"Supplementary Accuracy (FT): {efficientnet_supp_accuracy_ft:.4f}")
print(f"Expected Kaggle score: ~{efficientnet_supp_accuracy_ft + 0.01:.4f} - {efficientnet_supp_accuracy_ft + 0.03:.4f}")

## Test-Time Augmentation (TTA)

Aplicamos múltiples augmentations a cada imagen de test y promediamos las predicciones para mayor robustez.

In [None]:
# Configuración TTA
N_TTA = 10  # Número de augmentations por imagen (10 = buen balance precisión/tiempo)

print(f"Model: EfficientNetB3")
print(f"Resolution: {IMG_SIZE}x{IMG_SIZE}")
print(f"TTA enabled: {N_TTA} augmentations per image")
print("=" * 60)

# Augmentation MÁS SUAVE para TTA (solo cambios ligeros)
tta_augmentation = keras.Sequential([
    keras.layers.RandomFlip("horizontal"),
    keras.layers.RandomRotation(0.05),  # Muy leve
    keras.layers.RandomZoom(0.05),      # Muy leve
    keras.layers.RandomTranslation(0.03, 0.03),  # Muy leve
], name="tta_augmentation")

# Función para aplicar TTA
def predict_with_tta(model, dataset, n_tta=N_TTA):
    """
    Realiza predicciones con Test-Time Augmentation
    
    Args:
        model: Modelo entrenado
        dataset: Dataset de test
        n_tta: Número de augmentations por imagen
    
    Returns:
        Array de predicciones promediadas
    """
    print(f"Starting TTA predictions ({n_tta} augmentations)...")
    
    # Primera predicción SIN augmentation (baseline)
    predictions_list = [model.predict(dataset, verbose=0)]
    print(f"✓ Baseline prediction (1/{n_tta+1}) completed")
    
    # Predicciones CON augmentation
    for i in range(n_tta):
        # Aplicar augmentation al dataset
        augmented_dataset = dataset.map(
            lambda x: tta_augmentation(x, training=True),
            num_parallel_calls=tf_data.AUTOTUNE
        )
        
        # Predecir
        preds = model.predict(augmented_dataset, verbose=0)
        predictions_list.append(preds)
        
        print(f"✓ TTA prediction ({i+2}/{n_tta+1}) completed")
    
    # Promediar todas las predicciones
    predictions_avg = np.mean(predictions_list, axis=0)
    
    print(f"TTA completed: {n_tta+1} predictions averaged")
    print("=" * 60)
    
    return predictions_avg

# Ejecutar TTA
predictions = predict_with_tta(efficientnet_model, test_dataset, n_tta=N_TTA)

print(f"Total predictions: {len(predictions)}")
print(f"Prediction range: [{predictions.min():.4f}, {predictions.max():.4f}]")
print(f"Prediction mean: {predictions.mean():.4f}")

In [None]:
# Genero submission.csv con TTA
test_filenames = test_dataset.file_paths
ids = [int(os.path.splitext(os.path.basename(f))[0]) for f in test_filenames]

# Threshold optimizado: 0.5 es estándar, pero puedes ajustar si ves sesgo
predictions_binary = (predictions > 0.5).astype(int).flatten()

submission_df = pd.DataFrame({
    'id': ids,
    'label': predictions_binary
})

submission_df = submission_df.sort_values('id')

# Estadísticas finales
print("=" * 60)
print("SUBMISSION STATISTICS")
print("=" * 60)
print(f"Total samples: {len(submission_df)}")
print(f"Class 0 (Cat): {(submission_df['label'] == 0).sum()} ({(submission_df['label'] == 0).sum() / len(submission_df) * 100:.2f}%)")
print(f"Class 1 (Dog): {(submission_df['label'] == 1).sum()} ({(submission_df['label'] == 1).sum() / len(submission_df) * 100:.2f}%)")
print("=" * 60)

submission_df.to_csv('submission.csv', index=False)
print("✓ Submission saved to submission.csv")
print(submission_df.head(10))