# Overfitting y Underfitting

## Objetivos

- Comprender qu√© es el **overfitting** (sobreajuste) y el **underfitting** (subajuste)
- Identificar visualmente estos fen√≥menos en curvas de entrenamiento
- Aplicar t√©cnicas para prevenir overfitting en Pytorch

## Importar librer√≠as

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.datasets import make_friedman3
from tqdm import tqdm

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## Cargar y preparar datos

Usaremos el dataset de la central de ciclo combinado, pero **crearemos un escenario propenso al overfitting**:
- Entrenaremos con solo el 30% de los datos (dataset peque√±o ‚Üí f√°cil overfitting)
- Usaremos modelos complejos
- Sin regularizaci√≥n inicialmente

In [None]:
def prepare_data(n_samples, train_ratio, batch_size, seed=42):

  device = "cuda" if torch.cuda.is_available() else "cpu"
  
  X, y =make_friedman3(n_samples=n_samples,
                        noise=40,
                        random_state=seed)
  X_lb = np.array([0.,40.,0.,1.])
  X_ub = np.array([100.,560*np.pi,1.,11.])
  X_scaled = (X-X_lb)/(X_ub-X_lb)

  dataset = {}
  for i in range(X_scaled.shape[-1]):
    dataset[f"X_{i}"] = X_scaled[:,i]
  dataset["y"] = y
  dataset = pd.DataFrame(dataset)

  X_train = dataset.sample(frac=train_ratio, random_state=42)
  X_val = dataset.drop(X_train.index)

  # Separar features y target
  y_train = X_train.pop('y')
  y_val = X_val.pop('y')

  # Convertir a tensors de PyTorch
  X_train_tensor = torch.FloatTensor(X_train.values.copy()).to(device)
  y_train_tensor = torch.FloatTensor(y_train.values.copy()).reshape(-1, 1).to(device)

  X_val_tensor = torch.FloatTensor(X_val.values.copy()).to(device)
  y_val_tensor = torch.FloatTensor(y_val.values.copy()).reshape(-1, 1).to(device)

  # Data loader
  train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
  val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

  print(f"Train set: {X_train.shape[0]} muestras")
  print(f"Val set: {X_val.shape[0]} muestras")

  return train_loader, val_loader

In [None]:
TRAIN_RATIO = 0.2
BATCH_SIZE = 50
NSAMPLES = 1000

train_loader, val_loader = prepare_data(n_samples=NSAMPLES, 
                                        train_ratio=TRAIN_RATIO, 
                                        batch_size=BATCH_SIZE)

## Demostraci√≥n 1: Underfitting vs Overfitting (sin regularizaci√≥n)

Entrenaremos dos modelos:
1. **Modelo peque√±o** (16 ‚Üí 8 ‚Üí 1): Probablemente underfitting
2. **Modelo grande** (128 ‚Üí 64 ‚Üí 32 ‚Üí 1): Probablemente overfitting

In [None]:
class SmallModel(nn.Module):
    """Modelo peque√±o para observar subajuste o underfitting"""
    def __init__(self, input_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
        )
    
    def forward(self, x):
        return self.net(x)

class LargeModel(nn.Module):
    """Modelo grande para trabajar el sobreajuste o overfitting"""
    def __init__(self, input_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
        )
    
    def forward(self, x):
        return self.net(x)

# Construir modelos
input_size = train_loader.dataset.tensors[0].shape[1]
device = train_loader.dataset.tensors[0].device
small = SmallModel(input_size).to(device)
large = LargeModel(input_size).to(device)

# Conteo de par√°metros de la red neuronal
print(f"Par√°metros modelo peque√±o: {sum(p.numel() for p in small.parameters())}")
print(f"Par√°metros modelo grande: {sum(p.numel() for p in large.parameters())}")

In [None]:
def train_model(model, train_loader, val_loader, epochs=200, lr=0.01, early_stopping=False, patience=20, min_delta=0.001):
    """
    Funci√≥n para entrenar modelos.
    
    Par√°metros:
        early_stopping: Si True, detiene cuando val_loss no mejora
        patience: Cu√°ntas √©pocas esperar sin mejora
        min_delta: m√≠nimo cambio en la funci√≥n objetivo que se considera mejora
    """
    loss_fcn = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    
    history = {'train_loss': [], 'val_loss': [], 'train_mae': [], 'val_mae': []}
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in tqdm(range(epochs), desc="Training loop"):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            # Training
            y_pred = model(X_batch)
            loss = loss_fcn(y_pred, y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
        train_loss /= len(train_loader)

        # Validation
        val_loss = 0
        model.eval()
        for X_batch, y_batch in val_loader:
            with torch.no_grad():
                y_pred_val = model(X_batch)
                loss = loss_fcn(y_pred_val, y_batch)

                val_loss += loss
        val_loss /= len(val_loader)

        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        
        # Early stopping
        if early_stopping:
            if val_loss.item() < best_val_loss - min_delta:
                best_val_loss = val_loss.item()
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping en √©poca {epoch}")
                    break
        
        # if (epoch + 1) % 50 == 0:
        #     print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss.item():.4f}, Val Loss: {val_loss.item():.4f}")
    
    return history

In [None]:
EPOCHS = 400
LR = 0.01

In [None]:
# Entrenar modelo peque√±o (underfitting moderado)
small = SmallModel(input_size).to(device)
history_small = train_model(small, train_loader, val_loader, epochs=EPOCHS, lr=LR, early_stopping=False)

print(f"\nTrain loss final: {history_small['train_loss'][-1]:.4f}")
print(f"Val loss final: {history_small['val_loss'][-1]:.4f}")

# Entrenar modelo grande (overfitting)
large = LargeModel(input_size).to(device)
history_large = train_model(large, train_loader, val_loader, epochs=EPOCHS, lr=LR, early_stopping=False)

print(f"\nTrain loss final: {history_large['train_loss'][-1]:.4f}")
print(f"Val loss final: {history_large['val_loss'][-1]:.4f}")

In [None]:
# Visualizar underfitting vs overfitting
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Modelo peque√±o (underfitting)
axes[0].plot(history_small['train_loss'], label='Train Loss', linewidth=2, color="c")
axes[0].plot(history_small['val_loss'], label='Val Loss', linewidth=2, color="r")
axes[0].set_xlabel('√âpoca', fontsize=12)
axes[0].set_ylabel('MSE Loss', fontsize=12)
axes[0].set_title('Modelo Peque√±o (UNDERFITTING)\nAmbas p√©rdidas altas y similares', fontsize=13, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Modelo grande (overfitting)
axes[1].plot(history_large['train_loss'], label='Train Loss', linewidth=2, color="c")
axes[1].plot(history_large['val_loss'], label='Val Loss', linewidth=2, color="r")
axes[1].set_xlabel('√âpoca', fontsize=12)
axes[1].set_ylabel('MSE Loss', fontsize=12)
axes[1].set_title('Modelo Grande (OVERFITTING)\nVal loss diverge de train loss', fontsize=13, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nüìä OBSERVACIONES:")
print(f"Modelo peque√±o: brecha final = {history_small['val_loss'][-1] - history_small['train_loss'][-1]:.4f}")
print(f"Modelo grande: brecha final = {history_large['val_loss'][-1] - history_large['train_loss'][-1]:.4f}")
print(f"\n‚û°Ô∏è El modelo grande tiene OVERFITTING (brecha > 0)")
print(f"‚û°Ô∏è El modelo peque√±o tiene UNDERFITTING (ambas p√©rdidas altas)")