In [None]:
#====================================================================================================#
#                                                                                                    #
#                                                        ██╗   ██╗   ████████╗ █████╗ ██████╗        #
#      Competición - INAR                                ██║   ██║   ╚══██╔══╝██╔══██╗██╔══██╗       #
#                                                        ██║   ██║█████╗██║   ███████║██║  ██║       #
#      created:        29/10/2025  -  23:00:15           ██║   ██║╚════╝██║   ██╔══██║██║  ██║       #
#      last change:    30/10/2025  -  02:55:40           ╚██████╔╝      ██║   ██║  ██║██████╔╝       #
#                                                         ╚═════╝       ╚═╝   ╚═╝  ╚═╝╚═════╝        #
#                                                                                                    #
#      Ismael Hernandez Clemente                         ismael.hernandez@live.u-tad.com             #
#                                                                                                    #
#      Github:                                           https://github.com/ismaelucky342            #
#                                                                                                    #
#====================================================================================================#



# Gatos vs Perretes 

idea de diseño: 
- **Transfer Learning** con EfficientNet-B3 igual que el video este -> (https://www.youtube.com/watch?v=fCtMf6qHtdk)
- **K-Fold con validación cruzada** (5 folds) para mejor generalización
- **Entrenamiento por etapas**: primero solo la cabeza, luego fine-tuning completo
- **Data Augmentation** con Albumentations
- **Mixed Precision Training** para acelerar el entrenamiento

In [None]:
# Importo las librerías
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

# PyTorch y movidas varias
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

# modelos preentrenados con transfer
import timm

# Data augmentation y transformaciones
import albumentations as A
from albumentations.pytorch import ToTensorV2

# K-Fold validación cruzada 
from sklearn.model_selection import StratifiedKFold

# fijo todas las semillas
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Para usar las graficas de kagle
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Usando dispositivo: {device}")

## 1. Configuración Global
**[v1.3 - 30/10/2025 00:45 AM]** - Actualizado con Mixed Precision y AdamW

Aquí defino los hiperparámetros principales del modelo y las rutas de los datos.

In [None]:
# Configuración de parámetros y rutas
CONFIG = {
    'train_dir': '/kaggle/input/u-tad-dogs-vs-cats-2025/train/train',
    'test_dir': '/kaggle/input/u-tad-dogs-vs-cats-2025/test/test',
    'supplementary_dir': '/kaggle/input/u-tad-dogs-vs-cats-2025/supplementary_data/supplementary_data',
    
    'model_name': 'efficientnet_b3',
    'img_size': 300,
    'num_classes': 2,
    
    'batch_size': 32,
    'num_folds': 5,
    'epochs_stage1': 5,
    'epochs_stage2': 15,
    'lr_stage1': 1e-3,
    'lr_stage2': 1e-4,
    'weight_decay': 1e-2,
    'label_smoothing': 0.1,
    
    'num_workers': 2,
    'seed': 42
}

print("Configuración cargada:")
for key, value in CONFIG.items():
    print(f"   {key}: {value}")

## Preparación del Dataset

Creo un dataset personalizado de PyTorch y preparo los datos para validación cruzada K-Fold.

In [None]:
# Dataset personalizado
class DogsVsCatsDataset(Dataset):
    def __init__(self, image_paths, labels, transforms=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transforms = transforms
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        image = np.array(image)
        
        if self.transforms:
            image = self.transforms(image=image)['image']
        
        label = self.labels[idx]
        return image, label

# Preparo los datos de entrenamiento
def prepare_data(train_dir):
    image_paths = []
    labels = []
    
    for filename in os.listdir(train_dir):
        if filename.endswith('.jpg'):
            filepath = os.path.join(train_dir, filename)
            image_paths.append(filepath)
            label = 0 if filename.startswith('cat') else 1
            labels.append(label)
    
    return np.array(image_paths), np.array(labels)

train_paths, train_labels = prepare_data(CONFIG['train_dir'])
print(f"Datos cargados: {len(train_paths)} imágenes")
print(f"   - Gatos: {(train_labels == 0).sum()}")
print(f"   - Perros: {(train_labels == 1).sum()}")

## Transformaciones y Data Augmentation
**[v1.5 - 30/10/2025 02:20 AM]** - Augmentation mejorado con ShiftScaleRotate

Defino las transformaciones de entrenamiento con augmentation y las de validación/test sin augmentation.

In [None]:
# Transformaciones de entrenamiento con augmentation
train_transforms = A.Compose([
    A.RandomResizedCrop(height=CONFIG['img_size'], width=CONFIG['img_size'], scale=(0.8, 1.0)),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# Transformaciones de validación/test sin augmentation
val_transforms = A.Compose([
    A.Resize(height=CONFIG['img_size'], width=CONFIG['img_size']),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

print("Transformaciones definidas")
print(f"   - Train: RandomCrop, HorizontalFlip, Brightness/Contrast, Rotation")
print(f"   - Val/Test: Solo resize y normalización")

## Modelo con Transfer Learning
**[v1.0 - 29/10/2025 23:15 PM]** - Implementado Transfer Learning con EfficientNet-B3

Cargo EfficientNet-B3 preentrenado y lo adapto para clasificación binaria (perros vs gatos).

In [None]:
# Creo modelo con transfer learning
def create_model(model_name, num_classes, pretrained=True):
    model = timm.create_model(model_name, pretrained=pretrained, num_classes=num_classes)
    return model

# Congelo el backbone para etapa 1
def freeze_backbone(model):
    for name, param in model.named_parameters():
        if 'classifier' not in name:
            param.requires_grad = False
    return model

# Descongelo todo para etapa 2
def unfreeze_backbone(model):
    for param in model.parameters():
        param.requires_grad = True
    return model

model = create_model(CONFIG['model_name'], CONFIG['num_classes'], pretrained=True)
model = model.to(device)

print(f"Modelo creado: {CONFIG['model_name']}")
print(f"   - Parámetros totales: {sum(p.numel() for p in model.parameters()):,}")
print(f"   - Parámetros entrenables: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## Funciones de Entrenamiento y Validación
**[v1.3 - 30/10/2025 00:45 AM]** - Añadido Mixed Precision Training (AMP)

Implemento las funciones para entrenar y validar el modelo con mixed precision training.

In [None]:
# Función de entrenamiento con mixed precision
def train_epoch(model, dataloader, criterion, optimizer, scaler, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# Función de validación
def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

print("Funciones de entrenamiento y validación definidas")

## Entrenamiento con K-Fold Cross-Validation

commits Hechos:

**[v1.1 - 30/10/2025 00:10 AM]** - Implementado K-Fold (5 folds)  
**[v1.2 - 30/10/2025 00:30 AM]** - Añadido Early Stopping y Label Smoothing

Entreno el modelo con 5 folds y 2 etapas por fold:
1. **Etapa 1**: Solo entreno la cabeza con el backbone congelado
2. **Etapa 2**: Fine-tuning completo descongelando todo el modelo

In [None]:
# K-Fold cross-validation
skf = StratifiedKFold(n_splits=CONFIG['num_folds'], shuffle=True, random_state=CONFIG['seed'])

fold_models = []
fold_metrics = []

print(f"Iniciando entrenamiento con {CONFIG['num_folds']} folds\n")

for fold, (train_idx, val_idx) in enumerate(skf.split(train_paths, train_labels)):
    print(f"{'='*60}")
    print(f"FOLD {fold + 1}/{CONFIG['num_folds']}")
    print(f"{'='*60}")
    
    fold_train_paths = train_paths[train_idx]
    fold_train_labels = train_labels[train_idx]
    fold_val_paths = train_paths[val_idx]
    fold_val_labels = train_labels[val_idx]
    
    train_dataset = DogsVsCatsDataset(fold_train_paths, fold_train_labels, transforms=train_transforms)
    val_dataset = DogsVsCatsDataset(fold_val_paths, fold_val_labels, transforms=val_transforms)
    
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], 
                             shuffle=True, num_workers=CONFIG['num_workers'], pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'], 
                           shuffle=False, num_workers=CONFIG['num_workers'], pin_memory=True)
    
    model = create_model(CONFIG['model_name'], CONFIG['num_classes'], pretrained=True)
    model = model.to(device)
    
    # ETAPA 1: entreno solo la cabeza
    print(f"\nEtapa 1: Entrenando solo la cabeza ({CONFIG['epochs_stage1']} epochs)")
    model = freeze_backbone(model)
    
    criterion = nn.CrossEntropyLoss(label_smoothing=CONFIG['label_smoothing'])
    optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), 
                           lr=CONFIG['lr_stage1'], weight_decay=CONFIG['weight_decay'])
    scaler = GradScaler()
    
    best_val_acc = 0.0
    
    for epoch in range(CONFIG['epochs_stage1']):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, scaler, device)
        val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
        
        print(f"Epoch {epoch+1}/{CONFIG['epochs_stage1']} - "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
    
    # ETAPA 2: fine-tuning completo
    print(f"\nEtapa 2: Fine-tuning completo ({CONFIG['epochs_stage2']} epochs)")
    model = unfreeze_backbone(model)
    
    optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr_stage2'], 
                           weight_decay=CONFIG['weight_decay'])
    
    best_val_acc = 0.0
    patience_counter = 0
    patience_limit = 5
    
    for epoch in range(CONFIG['epochs_stage2']):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, scaler, device)
        val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
        
        print(f"Epoch {epoch+1}/{CONFIG['epochs_stage2']} - "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()
            patience_counter = 0
            print(f"   -> Nuevo mejor modelo (Val Acc: {val_acc:.2f}%)")
        else:
            patience_counter += 1
        
        if patience_counter >= patience_limit:
            print(f"   -> Early stopping en epoch {epoch+1}")
            break
    
    model.load_state_dict(best_model_state)
    
    fold_models.append(model)
    fold_metrics.append({
        'fold': fold + 1,
        'best_val_acc': best_val_acc,
        'val_loss': val_loss
    })
    
    print(f"\nFold {fold + 1} completado - Mejor Val Acc: {best_val_acc:.2f}%\n")

# Resumen de todos los folds
print(f"\n{'='*60}")
print("RESUMEN DE ENTRENAMIENTO")
print(f"{'='*60}")
for metric in fold_metrics:
    print(f"Fold {metric['fold']}: Val Acc = {metric['best_val_acc']:.2f}%")

avg_val_acc = np.mean([m['best_val_acc'] for m in fold_metrics])
print(f"\nAccuracy promedio en validación: {avg_val_acc:.2f}%")
print(f"{'='*60}\n")

## Inferencia en Test Set

Cargo las imágenes de test y genero predicciones promediando los 5 modelos de los folds.

In [None]:
# Dataset de test
class TestDataset(Dataset):
    def __init__(self, test_dir, transforms=None):
        self.test_dir = test_dir
        self.transforms = transforms
        self.image_files = sorted([f for f in os.listdir(test_dir) if f.endswith('.jpg')])
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.test_dir, img_name)
        
        image = Image.open(img_path).convert('RGB')
        image = np.array(image)
        
        if self.transforms:
            image = self.transforms(image=image)['image']
        
        img_id = int(img_name.split('.')[0])
        return image, img_id

test_dataset = TestDataset(CONFIG['test_dir'], transforms=val_transforms)
test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'], 
                         shuffle=False, num_workers=CONFIG['num_workers'], pin_memory=True)

print(f"Dataset de test cargado: {len(test_dataset)} imágenes")

In [None]:
# Genero predicciones promediando los 5 folds
print("Generando predicciones...")

all_predictions = []
all_ids = []

for model in fold_models:
    model.eval()

with torch.no_grad():
    for images, img_ids in test_loader:
        images = images.to(device)
        
        fold_preds = []
        for model in fold_models:
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)[:, 1]
            fold_preds.append(probs.cpu().numpy())
        
        avg_probs = np.mean(fold_preds, axis=0)
        all_predictions.extend(avg_probs)
        all_ids.extend(img_ids.numpy())

print(f"Predicciones generadas: {len(all_predictions)} imágenes")

## Generación del archivo Submission

Creo el archivo `submission.csv` con las predicciones finales para subir a Kaggle.

In [None]:
# Creo el submission
submission_df = pd.DataFrame({
    'id': all_ids,
    'label': all_predictions
})

submission_df = submission_df.sort_values('id').reset_index(drop=True)
submission_df.to_csv('submission.csv', index=False)

print("Archivo submission.csv generado correctamente")
print(f"\nPrimeras predicciones:")
print(submission_df.head(10))
print(f"\nEstadísticas de las predicciones:")
print(f"   - Media: {submission_df['label'].mean():.4f}")
print(f"   - Desviación estándar: {submission_df['label'].std():.4f}")
print(f"   - Mínimo: {submission_df['label'].min():.4f}")
print(f"   - Máximo: {submission_df['label'].max():.4f}")
print(f"\nTotal de predicciones: {len(submission_df)}")

## Visualización de Predicciones (mi bonus)

Muestro algunas imágenes del test set con sus predicciones para verificar visualmente.

In [None]:
# Visualizo predicciones aleatorias
def visualize_predictions(num_images=9):
    random_indices = np.random.choice(len(submission_df), size=num_images, replace=False)
    
    fig, axes = plt.subplots(3, 3, figsize=(15, 15))
    axes = axes.ravel()
    
    for i, idx in enumerate(random_indices):
        img_id = submission_df.iloc[idx]['id']
        prediction = submission_df.iloc[idx]['label']
        
        img_path = os.path.join(CONFIG['test_dir'], f"{img_id}.jpg")
        img = Image.open(img_path)
        
        predicted_class = "Perro" if prediction > 0.5 else "Gato"
        confidence = prediction if prediction > 0.5 else 1 - prediction
        
        axes[i].imshow(img)
        axes[i].axis('off')
        axes[i].set_title(f"ID: {img_id}\n{predicted_class} ({confidence*100:.1f}%)", 
                         fontsize=12, fontweight='bold')
    
    plt.tight_layout()
    plt.show()

visualize_predictions(num_images=9)