# Football Logo Classification - Google Colab (OPTIMIZED)
## Clasificación de logos de equipos de fútbol por liga europea

**Dataset:** 579 logos de 24 ligas europeas (limpiado)

**Modelos:** CustomCNN (baseline) vs ResNet18 (transfer learning con freeze)

**Mejoras aplicadas:**
- Dataset limpio (eliminadas 2 ligas con < 15 imágenes)
- Augmentation agresivo (rotation, zoom, color, perspective) SIN flip horizontal
- Learning rates reducidos 50%
- Más epochs para convergencia gradual
- Weight decay (L2 regularization)
- Dropout aumentado a 0.65
- Batch size reducido a 16
- ResNet18 con freeze de capas tempranas

## Setup: Montar Drive e instalar dependencias

In [None]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navegar al proyecto (ajusta la ruta si tu carpeta está en otro lugar)
import os
os.chdir('/content/drive/MyDrive/FLIC')

# Instalar dependencias
!pip install -q torch torchvision matplotlib seaborn scikit-learn Pillow

# Verificar estructura
print("✓ Drive montado")
print("✓ Working directory:", os.getcwd())
print("\n✓ Contenido:")
!ls -la

print("\n✓ Módulos src/:")
!ls src/

print("\n✓ Ligas en data/:")
!ls data/ | wc -l
!ls data/ | head -5

import torch
print("\n✓ PyTorch version:", torch.__version__)
print("✓ GPU disponible:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("✓ GPU detectada:", torch.cuda.get_device_name(0))

## Imports y configuración

In [None]:
import sys
sys.path.append('.')

import torch
import matplotlib.pyplot as plt

from src.dataset import get_dataloaders
from src.models import CustomCNN, get_resnet18
from src.train import train_model
from src.evaluate import evaluate_model, plot_confusion_matrix, plot_training_history
from src.utils import predict_from_dataset, visualize_prediction_from_dataset, visualize_dataset_samples

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

## Cargar datos

In [None]:
DATA_DIR = 'data'
BATCH_SIZE = 16  # Reduced for better gradient updates

train_loader, val_loader, test_loader, class_names = get_dataloaders(
    DATA_DIR,
    batch_size=BATCH_SIZE,
    val_split=0.15,
    test_split=0.15
)

print(f"Number of classes (leagues): {len(class_names)}")
print(f"Training samples: {len(train_loader.dataset)}")
print(f"Validation samples: {len(val_loader.dataset)}")
print(f"Test samples: {len(test_loader.dataset)}")

## Explorar ligas

In [None]:
print("Leagues in dataset:")
for i, league in enumerate(class_names, 1):
    print(f"{i:2d}. {league}")

## Visualizar muestras con augmentation

In [None]:
visualize_dataset_samples(train_loader.dataset.dataset, class_names, n_samples=16)

## Entrenar CustomCNN (OPTIMIZADO)

In [None]:
# ====================================================================
# CUSTOM CNN - CONFIGURACIONES OPTIMIZADAS
# ====================================================================
# CAMBIOS para combatir overfitting brutal (train 95%, val 20%):
# - LRs reducidos 50% (0.0005, 0.0001, 0.00005)
# - Más epochs (+10)
# - Weight decay (L2 reg)
# - Dropout 0.65
# - Augmentation agresivo
# - Batch size 16

cnn_configs = [
    {'name': 'Agresiva', 'epochs': 25, 'lr': 0.0005, 'weight_decay': 1e-4},
    {'name': 'Balanceada', 'epochs': 30, 'lr': 0.0001, 'weight_decay': 1e-4},
    {'name': 'Conservadora', 'epochs': 35, 'lr': 0.00005, 'weight_decay': 1e-3}
]

cnn_results = []

print("="*70)
print("CUSTOM CNN - CONFIGURACIONES OPTIMIZADAS")
print("="*70)

for i, config in enumerate(cnn_configs, 1):
    print(f"\n{'='*70}")
    print(f"CONFIG {i}/3: {config['name']}")
    print(f"Epochs: {config['epochs']} | LR: {config['lr']} | WD: {config['weight_decay']}")
    print(f"{'='*70}")
    
    model = CustomCNN(num_classes=len(class_names))
    
    history = train_model(
        model,
        train_loader,
        val_loader,
        epochs=config['epochs'],
        lr=config['lr'],
        weight_decay=config['weight_decay'],
        device=DEVICE
    )
    
    cnn_results.append({
        'config': config,
        'model': model,
        'history': history,
        'final_val_acc': history['val_acc'][-1],
        'final_val_loss': history['val_loss'][-1],
        'overfitting': history['train_acc'][-1] - history['val_acc'][-1]
    })
    
    print(f"\nResultados Config {config['name']}:")
    print(f"  - Val Accuracy: {history['val_acc'][-1]:.2f}%")
    print(f"  - Val Loss: {history['val_loss'][-1]:.4f}")
    print(f"  - Overfitting: {history['train_acc'][-1] - history['val_acc'][-1]:.2f}%")

print(f"\n{'='*70}")
print("CUSTOM CNN - Completado")
print(f"{'='*70}")

## Visualizar curvas CustomCNN

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle('Custom CNN - Comparación de Configuraciones (Optimizado)', fontsize=16, fontweight='bold')

for i, result in enumerate(cnn_results):
    ax = axes[i]
    history = result['history']
    config = result['config']
    
    ax.plot(history['train_acc'], label='Train Accuracy', linewidth=2)
    ax.plot(history['val_acc'], label='Val Accuracy', linewidth=2)
    ax.set_title(f"{config['name']}\n(epochs={config['epochs']}, lr={config['lr']})")
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy (%)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
plt.tight_layout()
plt.show()

## Seleccionar mejor CustomCNN

In [None]:
best_cnn = max(cnn_results, key=lambda x: x['final_val_acc'] - 0.3 * x['overfitting'])

print("="*70)
print("MEJOR CONFIGURACIÓN - CUSTOM CNN")
print("="*70)
print(f"Configuración elegida: {best_cnn['config']['name']}")
print(f"  - Epochs: {best_cnn['config']['epochs']}")
print(f"  - Learning Rate: {best_cnn['config']['lr']}")
print(f"  - Val Accuracy: {best_cnn['final_val_acc']:.2f}%")
print(f"  - Val Loss: {best_cnn['final_val_loss']:.4f}")
print(f"  - Overfitting: {best_cnn['overfitting']:.2f}%")
print("="*70)

## Entrenar ResNet18 (OPTIMIZADO con FREEZE)

In [None]:
# ====================================================================
# RESNET18 - CONFIGURACIONES OPTIMIZADAS CON FREEZE
# ====================================================================
# CAMBIOS para combatir overfitting:
# - FREEZE capas tempranas (conv1, bn1, layer1, layer2)
# - Solo entrenar: layer3, layer4, fc
# - LRs ultra-bajos (no destruir pesos ImageNet)
# - Más epochs
# - Weight decay fuerte

resnet_configs = [
    {'name': 'Estándar', 'epochs': 20, 'lr': 0.00005, 'weight_decay': 1e-4},
    {'name': 'Moderada', 'epochs': 25, 'lr': 0.00001, 'weight_decay': 1e-3},
    {'name': 'Fina', 'epochs': 30, 'lr': 0.000005, 'weight_decay': 1e-3}
]

resnet_results = []

print("="*70)
print("RESNET18 - CONFIGURACIONES OPTIMIZADAS (FREEZE)")
print("="*70)

for i, config in enumerate(resnet_configs, 1):
    print(f"\n{'='*70}")
    print(f"CONFIG {i}/3: {config['name']}")
    print(f"Epochs: {config['epochs']} | LR: {config['lr']} | WD: {config['weight_decay']}")
    print(f"{'='*70}")
    
    # FREEZE capas tempranas
    model = get_resnet18(num_classes=len(class_names), pretrained=True, freeze_layers=True)
    
    history = train_model(
        model,
        train_loader,
        val_loader,
        epochs=config['epochs'],
        lr=config['lr'],
        weight_decay=config['weight_decay'],
        device=DEVICE
    )
    
    resnet_results.append({
        'config': config,
        'model': model,
        'history': history,
        'final_val_acc': history['val_acc'][-1],
        'final_val_loss': history['val_loss'][-1],
        'overfitting': history['train_acc'][-1] - history['val_acc'][-1]
    })
    
    print(f"\nResultados Config {config['name']}:")
    print(f"  - Val Accuracy: {history['val_acc'][-1]:.2f}%")
    print(f"  - Val Loss: {history['val_loss'][-1]:.4f}")
    print(f"  - Overfitting: {history['train_acc'][-1] - history['val_acc'][-1]:.2f}%")

print(f"\n{'='*70}")
print("RESNET18 - Completado")
print(f"{'='*70}")

## Visualizar curvas ResNet18

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle('ResNet18 (Freeze + Transfer Learning) - Comparación', fontsize=16, fontweight='bold')

for i, result in enumerate(resnet_results):
    ax = axes[i]
    history = result['history']
    config = result['config']
    
    ax.plot(history['train_acc'], label='Train Accuracy', linewidth=2)
    ax.plot(history['val_acc'], label='Val Accuracy', linewidth=2)
    ax.set_title(f"{config['name']}\n(epochs={config['epochs']}, lr={config['lr']})")
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy (%)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
plt.tight_layout()
plt.show()

## Seleccionar mejor ResNet18

In [None]:
best_resnet = max(resnet_results, key=lambda x: x['final_val_acc'] - 0.3 * x['overfitting'])

print("="*70)
print("MEJOR CONFIGURACIÓN - RESNET18")
print("="*70)
print(f"Configuración elegida: {best_resnet['config']['name']}")
print(f"  - Epochs: {best_resnet['config']['epochs']}")
print(f"  - Learning Rate: {best_resnet['config']['lr']}")
print(f"  - Val Accuracy: {best_resnet['final_val_acc']:.2f}%")
print(f"  - Val Loss: {best_resnet['final_val_loss']:.4f}")
print(f"  - Overfitting: {best_resnet['overfitting']:.2f}%")
print("="*70)

## Evaluar modelos en test set

In [None]:
print("="*70)
print("EVALUACIÓN: MEJOR CUSTOM CNN")
print("="*70)
results_cnn = evaluate_model(best_cnn['model'], test_loader, class_names, device=DEVICE)

In [None]:
print("\n" + "="*70)
print("EVALUACIÓN: MEJOR RESNET18")
print("="*70)
results_resnet = evaluate_model(best_resnet['model'], test_loader, class_names, device=DEVICE)

## Comparación final

In [None]:
print("\n" + "="*70)
print("COMPARACIÓN FINAL - TEST SET")
print("="*70)
print(f"Custom CNN ({best_cnn['config']['name']}): {results_cnn['accuracy']:.2f}%")
print(f"ResNet18 ({best_resnet['config']['name']}): {results_resnet['accuracy']:.2f}%")
print(f"\nMejora con Transfer Learning: {results_resnet['accuracy'] - results_cnn['accuracy']:.2f}%")
print("="*70)

if results_resnet['accuracy'] > results_cnn['accuracy']:
    winner_name = f"ResNet18 ({best_resnet['config']['name']})"
    winner_model = best_resnet['model']
    winner_results = results_resnet
else:
    winner_name = f"Custom CNN ({best_cnn['config']['name']})"
    winner_model = best_cnn['model']
    winner_results = results_cnn

print(f"\nMODELO GANADOR: {winner_name}")
print(f"Test Accuracy: {winner_results['accuracy']:.2f}%")

## Matriz de confusión

In [None]:
plot_confusion_matrix(
    winner_results['labels'],
    winner_results['predictions'],
    class_names,
    figsize=(14, 12)
)

## Predicciones de prueba

In [None]:
import random

test_dataset = test_loader.dataset.dataset
random_idx = random.randint(0, len(test_dataset) - 1)

predictions, true_label, image = predict_from_dataset(
    test_dataset,
    winner_model,
    class_names,
    random_idx,
    device=DEVICE,
    top_k=5
)

print(f"True Label: {true_label}")
print("\nTop 5 predictions:")
for i, (league, prob) in enumerate(predictions, 1):
    print(f"{i}. {league}: {prob:.2f}%")

In [None]:
visualize_prediction_from_dataset(image, predictions[:3], true_label)

## Guardar mejor modelo

In [None]:
import os
os.makedirs('models', exist_ok=True)

MODEL_PATH = 'models/best_model_optimized.pth'
torch.save(winner_model.state_dict(), MODEL_PATH)

print("="*70)
print("MODELO GUARDADO")
print("="*70)
print(f"Modelo: {winner_name}")
print(f"Path: {MODEL_PATH}")
print(f"Test Accuracy: {winner_results['accuracy']:.2f}%")
print("="*70)