# üåΩ Entrenamiento MobileNetV3 - ARQUITECTURA 100/100

**Objetivo: >85% Accuracy + >80% Recall**

## üéØ Optimizaciones Clave:
1. ‚úÖ Arquitectura 384‚Üí192 (m√°s capacidad que 256‚Üí128)
2. ‚úÖ Batch size 32 (mejor generalizaci√≥n)
3. ‚úÖ 60 √©pocas iniciales (m√°s tiempo para converger)
4. ‚úÖ Fine-tuning ULTRA CONSERVADOR (10 capas, LR=0.000025)
5. ‚úÖ Cosine Decay LR Schedule
6. ‚úÖ Callbacks optimizados para recall

---

## üîß BLOQUE 1: Setup y Verificaci√≥n

In [None]:
# 1.1 Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 1.2 Clonar repositorio
!git clone -b main https://github.com/ojgonzalezz/corn-diseases-detection.git
%cd corn-diseases-detection/entrenamiento_modelos

# 1.3 Instalar dependencias
!pip install -q -r requirements.txt

# 1.4 Crear directorios necesarios en Drive
!mkdir -p /content/drive/MyDrive/corn-diseases-detection/models
!mkdir -p /content/drive/MyDrive/corn-diseases-detection/logs
!mkdir -p /content/drive/MyDrive/corn-diseases-detection/mlruns

print("\n‚úÖ Setup completado!")

## üèóÔ∏è BLOQUE 2: Configuraci√≥n OPTIMIZADA y Modelo

In [None]:
import os
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers.schedules import CosineDecay
from sklearn.utils.class_weight import compute_class_weight

# Importar configuraci√≥n base
from config import *
from utils import setup_gpu

# ==================== CONFIGURACI√ìN OPTIMIZADA ====================
# Override de hiperpar√°metros para arquitectura 100/100
BATCH_SIZE = 32  # Mejor generalizaci√≥n que 64
EPOCHS = 60  # Aumentado de 40 para mejor convergencia
LEARNING_RATE = 0.001  # LR inicial
EARLY_STOPPING_PATIENCE = 20  # M√°s paciencia

# Configurar GPU
setup_gpu(GPU_MEMORY_LIMIT)

print(f"\n{'='*60}")
print("CONFIGURACI√ìN ARQUITECTURA 100/100")
print(f"{'='*60}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"√âpocas Iniciales: {EPOCHS}")
print(f"Learning Rate: {LEARNING_RATE}")
print(f"Early Stopping Patience: {EARLY_STOPPING_PATIENCE}")
print(f"{'='*60}\n")

In [None]:
# Crear generadores de datos
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print("Creando generadores de datos...\n")

# Solo rescale (augmentation ya aplicado en preprocessing)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VAL_SPLIT + TEST_SPLIT
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VAL_SPLIT + TEST_SPLIT
)

train_gen = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=RANDOM_SEED
)

val_gen = val_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=RANDOM_SEED
)

test_gen = val_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=RANDOM_SEED
)

print(f"üìä Dataset:")
print(f"  Training:   {train_gen.samples} im√°genes ({train_gen.samples // BATCH_SIZE} batches)")
print(f"  Validation: {val_gen.samples} im√°genes ({val_gen.samples // BATCH_SIZE} batches)")
print(f"  Test:       {test_gen.samples} im√°genes ({test_gen.samples // BATCH_SIZE} batches)")

# Calcular class weights para maximizar recall
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_gen.classes),
    y=train_gen.classes
)
class_weight_dict = dict(enumerate(class_weights))
print(f"\n‚öñÔ∏è Class weights: {class_weight_dict}")

In [None]:
# Crear modelo con ARQUITECTURA 100/100
def create_ultimate_model(num_classes, image_size, initial_learning_rate, steps_per_epoch):
    """
    Arquitectura 100/100 optimizada para >85% accuracy y >80% recall
    
    Mejoras clave:
    - Dense(384) ‚Üí Dense(192): M√°s capacidad que 256‚Üí128
    - Dropout(0.4, 0.35): Mayor regularizaci√≥n
    - Cosine Decay LR: Mejor convergencia
    - L2 regularization: 0.001
    """
    
    # Cargar base preentrenada
    base_model = MobileNetV3Large(
        input_shape=(*image_size, 3),
        include_top=False,
        weights='imagenet'
    )
    
    # Congelar capas base inicialmente
    base_model.trainable = False
    
    # ARQUITECTURA 100/100: 384 ‚Üí 192
    inputs = tf.keras.Input(shape=(*image_size, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    
    # Primera capa densa: 384 neuronas (50% m√°s que 256)
    x = Dense(384, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = Dropout(0.4)(x)  # Dropout alto para mejor generalizaci√≥n
    
    # Segunda capa densa: 192 neuronas (50% m√°s que 128)
    x = Dense(192, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = Dropout(0.35)(x)
    
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    
    # Cosine Decay Learning Rate Schedule
    lr_schedule = CosineDecay(
        initial_learning_rate=initial_learning_rate,
        decay_steps=steps_per_epoch * 60,  # 60 √©pocas
        alpha=0.1  # LR final = 10% del inicial
    )
    
    # Compilar con LR schedule
    model.compile(
        optimizer=Adam(learning_rate=lr_schedule),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Crear modelo
print("\nüèóÔ∏è Creando modelo con arquitectura 100/100...\n")
steps_per_epoch = train_gen.samples // BATCH_SIZE

model = create_ultimate_model(
    num_classes=NUM_CLASSES,
    image_size=IMAGE_SIZE,
    initial_learning_rate=LEARNING_RATE,
    steps_per_epoch=steps_per_epoch
)

print(f"üìê Total par√°metros: {model.count_params():,}")
trainable_params = sum([tf.size(w).numpy() for w in model.trainable_weights])
print(f"üìê Par√°metros entrenables: {trainable_params:,}")
print(f"üìê Ratio datos/params: {train_gen.samples / trainable_params:.2f}")
print("\n‚úÖ Modelo creado con arquitectura 100/100!")

## üöÄ BLOQUE 3: Entrenamiento Inicial (60 √©pocas)

In [None]:
# Callbacks para entrenamiento inicial
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=EARLY_STOPPING_PATIENCE,
        restore_best_weights=True,
        verbose=1,
        mode='max'
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=7,
        min_lr=1e-7,
        verbose=1
    ),
    ModelCheckpoint(
        str(MODELS_DIR / 'mobilenetv3_best.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1,
        mode='max'
    )
]

print(f"\n{'='*60}")
print("üöÄ INICIANDO ENTRENAMIENTO INICIAL (60 √âPOCAS)")
print(f"{'='*60}\n")
print("Objetivo: >85% accuracy, >80% recall")
print("Arquitectura: 384‚Üí192 (100/100)")
print(f"Batch size: {BATCH_SIZE}")
print(f"Learning rate: Cosine Decay desde {LEARNING_RATE}")
print(f"{'='*60}\n")

start_time = time.time()

history = model.fit(
    train_gen,
    epochs=EPOCHS,
    validation_data=val_gen,
    callbacks=callbacks,
    class_weight=class_weight_dict,
    verbose=1
)

training_time = time.time() - start_time
best_val_acc = max(history.history['val_accuracy'])
best_epoch = history.history['val_accuracy'].index(best_val_acc) + 1

print(f"\n{'='*60}")
print("‚úÖ ENTRENAMIENTO INICIAL COMPLETADO")
print(f"{'='*60}")
print(f"‚è±Ô∏è  Tiempo: {training_time/60:.2f} minutos")
print(f"üìä Mejor Val Accuracy: {best_val_acc:.4f} ({best_val_acc*100:.2f}%) en √©poca {best_epoch}")
print(f"üìä Train Accuracy final: {history.history['accuracy'][-1]:.4f}")
print(f"{'='*60}\n")

## üéØ BLOQUE 4: Fine-tuning ULTRA CONSERVADOR (10 √©pocas)

**IMPORTANTE:** Solo ejecuta este bloque si:
- Val Accuracy < 85% despu√©s del entrenamiento inicial
- El modelo muestra potencial de mejora (no hay overfitting severo)

Si ya tienes >85% accuracy, **SALTA este bloque** y ve directo al Bloque 5.

In [None]:
# Verificar si vale la pena hacer fine-tuning
if best_val_acc >= 0.85:
    print(f"\n{'='*60}")
    print("üéâ ¬°OBJETIVO ALCANZADO SIN FINE-TUNING!")
    print(f"{'='*60}")
    print(f"Val Accuracy: {best_val_acc:.4f} (>85%)")
    print("No es necesario ejecutar fine-tuning.")
    print("Ve directo al BLOQUE 5 para evaluaci√≥n.")
    print(f"{'='*60}\n")
else:
    print(f"\n{'='*60}")
    print("üéØ INICIANDO FINE-TUNING ULTRA CONSERVADOR")
    print(f"{'='*60}")
    print(f"Val Accuracy actual: {best_val_acc:.4f} (<85%)")
    print("Descongelando SOLO 10 capas finales...")
    print(f"{'='*60}\n")
    
    # Descongelar SOLO las √∫ltimas 10 capas (ultra conservador)
    base_model = model.layers[1]
    base_model.trainable = True
    
    for layer in base_model.layers[:-10]:
        layer.trainable = False
    
    trainable_layers = sum([1 for layer in base_model.layers if layer.trainable])
    print(f"üîì Capas descongeladas: {trainable_layers} de {len(base_model.layers)}\n")
    
    # Recompilar con LR ULTRA BAJO: 0.000025 (2.5% del LR inicial)
    model.compile(
        optimizer=Adam(learning_rate=LEARNING_RATE * 0.025),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Callbacks para fine-tuning ultra conservador
    finetune_callbacks = [
        EarlyStopping(
            monitor='val_accuracy',
            patience=5,  # Parar r√°pido si empeora
            restore_best_weights=True,
            verbose=1,
            mode='max'
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=3,
            min_lr=1e-7,
            verbose=1,
            mode='min'
        ),
        ModelCheckpoint(
            str(MODELS_DIR / 'mobilenetv3_best.keras'),
            monitor='val_accuracy',
            save_best_only=True,
            verbose=1,
            mode='max'
        )
    ]
    
    # Fine-tuning con SOLO 10 √©pocas
    history_finetune = model.fit(
        train_gen,
        epochs=10,
        validation_data=val_gen,
        callbacks=finetune_callbacks,
        class_weight=class_weight_dict,
        verbose=1
    )
    
    # Combinar historiales
    for key in history.history:
        history.history[key].extend(history_finetune.history[key])
    
    finetune_time = time.time() - start_time - training_time
    total_time = time.time() - start_time
    
    best_val_acc_finetune = max(history_finetune.history['val_accuracy'])
    
    print(f"\n{'='*60}")
    print("‚úÖ FINE-TUNING COMPLETADO")
    print(f"{'='*60}")
    print(f"‚è±Ô∏è  Tiempo fine-tuning: {finetune_time/60:.2f} minutos")
    print(f"‚è±Ô∏è  Tiempo total: {total_time/60:.2f} minutos")
    print(f"üìä Mejor Val Accuracy (fine-tuning): {best_val_acc_finetune:.4f}")
    
    if best_val_acc_finetune > best_val_acc:
        print(f"üìà ¬°Mejora lograda! {best_val_acc:.4f} ‚Üí {best_val_acc_finetune:.4f}")
    else:
        print(f"üìâ No hubo mejora. Mejor resultado sigue siendo: {best_val_acc:.4f}")
    
    print(f"{'='*60}\n")

## üìä BLOQUE 5: Evaluaci√≥n Completa y Guardado

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import json
from datetime import datetime
from utils import evaluate_model, plot_training_history, plot_confusion_matrix, save_training_log

print(f"\n{'='*60}")
print("üìä EVALUACI√ìN EN TEST SET")
print(f"{'='*60}\n")

# Evaluar modelo en test set
evaluation_results = evaluate_model(model, test_gen, CLASSES)

test_acc = evaluation_results['test_accuracy']
test_loss = evaluation_results['test_loss']

print(f"\n{'='*60}")
print("üìà RESULTADOS FINALES")
print(f"{'='*60}")
print(f"Test Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"Test Loss:     {test_loss:.4f}")

# Verificar si se alcanz√≥ el objetivo
if test_acc >= 0.85:
    print(f"\nüéâ ¬°OBJETIVO DE ACCURACY ALCANZADO! (>85%)")
else:
    print(f"\n‚ö†Ô∏è  Accuracy por debajo del objetivo: {test_acc:.4f} < 0.85")

print(f"\n{'='*60}")
print("üìã M√âTRICAS POR CLASE")
print(f"{'='*60}")

recall_objetivo_alcanzado = True
for class_name in CLASSES:
    metrics = evaluation_results['classification_report'][class_name]
    recall = metrics['recall']
    precision = metrics['precision']
    f1 = metrics['f1-score']
    
    status = "‚úÖ" if recall >= 0.80 else "‚ùå"
    
    print(f"\n{status} {class_name}:")
    print(f"  Precision: {precision:.4f} ({precision*100:.2f}%)")
    print(f"  Recall:    {recall:.4f} ({recall*100:.2f}%)")
    print(f"  F1-Score:  {f1:.4f} ({f1*100:.2f}%)")
    
    if recall < 0.80:
        recall_objetivo_alcanzado = False

if recall_objetivo_alcanzado:
    print(f"\nüéâ ¬°OBJETIVO DE RECALL ALCANZADO EN TODAS LAS CLASES! (>80%)")
else:
    print(f"\n‚ö†Ô∏è  Algunas clases tienen recall < 80%")

print(f"\n{'='*60}\n")

In [None]:
# Guardar gr√°ficos y resultados
print("üíæ Guardando resultados...\n")

# 1. Gr√°fico de entrenamiento
plot_path = LOGS_DIR / 'mobilenetv3_ultimate_training_history.png'
plot_training_history(history, plot_path)
print(f"‚úÖ Gr√°fico guardado: {plot_path}")

# 2. Matriz de confusi√≥n
cm_path = LOGS_DIR / 'mobilenetv3_ultimate_confusion_matrix.png'
cm = plot_confusion_matrix(
    evaluation_results['y_true'],
    evaluation_results['y_pred'],
    CLASSES,
    cm_path
)
print(f"‚úÖ Matriz de confusi√≥n guardada: {cm_path}")

# 3. Modelo final
model_path = MODELS_DIR / 'mobilenetv3_ultimate_final.keras'
model.save(str(model_path))
print(f"‚úÖ Modelo final guardado: {model_path}")

# 4. Log detallado
hyperparameters = {
    'model_name': 'MobileNetV3-Large ULTIMATE',
    'architecture': 'Dense(384)->Dense(192) [100/100]',
    'image_size': IMAGE_SIZE,
    'batch_size': BATCH_SIZE,
    'epochs_initial': EPOCHS,
    'learning_rate': LEARNING_RATE,
    'lr_schedule': 'CosineDecay',
    'optimizer': 'Adam',
    'dropout': [0.4, 0.35],
    'l2_regularization': 0.001,
    'fine_tuning_layers': 10,
    'fine_tuning_lr': LEARNING_RATE * 0.025,
    'fine_tuning_epochs': 10
}

log_path = LOGS_DIR / 'mobilenetv3_ultimate_training_log.json'
total_time = time.time() - start_time

save_training_log(
    log_path,
    'MobileNetV3-Large ULTIMATE',
    hyperparameters,
    history,
    evaluation_results,
    cm,
    total_time
)
print(f"‚úÖ Log guardado: {log_path}")

# 5. Resumen final
print(f"\n{'='*60}")
print("üéâ ¬°ENTRENAMIENTO COMPLETADO EXITOSAMENTE!")
print(f"{'='*60}")
print(f"‚è±Ô∏è  Tiempo total: {total_time/60:.2f} minutos")
print(f"üìä Test Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"üìä Objetivo Accuracy (>85%): {'‚úÖ ALCANZADO' if test_acc >= 0.85 else '‚ùå NO ALCANZADO'}")
print(f"üìä Objetivo Recall (>80%): {'‚úÖ ALCANZADO' if recall_objetivo_alcanzado else '‚ùå NO ALCANZADO'}")
print(f"\nüíæ Todos los archivos guardados en:")
print(f"   ‚Ä¢ Modelo: {model_path}")
print(f"   ‚Ä¢ Logs: {LOGS_DIR}")
print(f"{'='*60}\n")