In [1]:
import torch
import torch.nn as nn
import numpy as np
import itertools

# Definimos posibles valores para el grid search
hidden_sizes = [32, 50, 64, 128]        # Tamaños de capas ocultas
num_layers_list = [3, 4, 5, 6]          # Número de capas ocultas
activation_functions = [nn.ReLU, nn.GELU, nn.Tanh]  # Funciones de activación

# Creamos una clase flexible de PINN con configuraciones variables
class FlexibleNeuralNet(nn.Module):
    def __init__(self, hidden_size, num_layers, activation_fn, output_size=1, input_size=1):
        super(FlexibleNeuralNet, self).__init__()
        layers = []
        
        # Capa de entrada
        layers.append(nn.Linear(input_size, hidden_size))
        layers.append(activation_fn())
        
        # Capas ocultas
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(activation_fn())
        
        # Capa de salida
        layers.append(nn.Linear(hidden_size, output_size))
        
        # Conectar las capas
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Definimos la función de costo (MSE)
criterion = nn.MSELoss()

# Vector de tiempo como entrada de la NN
t_numpy = np.arange(-5, 5 + 0.01, 0.001, dtype=np.float32)
t = torch.from_numpy(t_numpy).reshape(len(t_numpy), 1)
t.requires_grad_(True)

# Parámetros físicos
g = 9.8
L = 10

# Inicializar los resultados del grid search
results = []

# Grid search
for hidden_size, num_layers, activation_fn in itertools.product(hidden_sizes, num_layers_list, activation_functions):
    # Inicializar modelo y optimizador
    model = FlexibleNeuralNet(hidden_size=hidden_size, num_layers=num_layers, activation_fn=activation_fn)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    
    # Entrenar por unas pocas épocas para evaluación rápida
    num_epochs = 500
    for epoch in range(num_epochs):
        epsilon = torch.normal(0, 0.1, size=(len(t), 1)).float()
        t_train = t + epsilon

        # Forward pass
        y_pred = model(t_train)
        
        # Calcular derivadas
        dy_dt = torch.autograd.grad(y_pred, t_train, grad_outputs=torch.ones_like(y_pred), create_graph=True)[0]
        d2y_dt2 = torch.autograd.grad(dy_dt, t_train, grad_outputs=torch.ones_like(dy_dt), create_graph=True)[0]

        # Pérdida de la ecuación diferencial
        loss_DE = criterion(d2y_dt2 + (g/L)*y_pred, torch.zeros_like(d2y_dt2))
        
        # Pérdida de la condición inicial
        loss_IC = criterion(model(torch.tensor([[0.0]])), torch.tensor([[2.0]]))
        
        # Pérdida total
        loss = loss_DE + loss_IC

        # Optimización
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Guardamos los resultados del grid search
    results.append({
        'hidden_size': hidden_size,
        'num_layers': num_layers,
        'activation_fn': activation_fn.__name__,
        'final_loss': loss.item()
    })
    print(f'Config: Hidden Size={hidden_size}, Layers={num_layers}, Activation={activation_fn.__name__} - Loss: {loss.item()}')

# Mostramos la mejor configuración
best_config = min(results, key=lambda x: x['final_loss'])
print(f"\nMejor configuración: Hidden Size={best_config['hidden_size']}, Layers={best_config['num_layers']}, "
      f"Activation={best_config['activation_fn']} - Loss: {best_config['final_loss']}")


Config: Hidden Size=32, Layers=3, Activation=ReLU - Loss: 0.0502607524394989
Config: Hidden Size=32, Layers=3, Activation=GELU - Loss: 0.018592871725559235
Config: Hidden Size=32, Layers=3, Activation=Tanh - Loss: 0.26835495233535767
Config: Hidden Size=32, Layers=4, Activation=ReLU - Loss: 0.04083538427948952
Config: Hidden Size=32, Layers=4, Activation=GELU - Loss: 0.00011622115562204272
Config: Hidden Size=32, Layers=4, Activation=Tanh - Loss: 0.009346365928649902
Config: Hidden Size=32, Layers=5, Activation=ReLU - Loss: 0.016851380467414856
Config: Hidden Size=32, Layers=5, Activation=GELU - Loss: 8.861877722665668e-05
Config: Hidden Size=32, Layers=5, Activation=Tanh - Loss: 0.007047646678984165
Config: Hidden Size=32, Layers=6, Activation=ReLU - Loss: 0.02289504185318947
Config: Hidden Size=32, Layers=6, Activation=GELU - Loss: 0.0002243544877273962
Config: Hidden Size=32, Layers=6, Activation=Tanh - Loss: 0.01073321420699358
Config: Hidden Size=50, Layers=3, Activation=ReLU - Lo

In [4]:
import os

# Creamos una carpeta para guardar las gráficas, si no existe
output_folder = "heatmaps"
os.makedirs(output_folder, exist_ok=True)

#Creamos un heatmap de las pérdidas promedio en escala logarítmica y guardarlo
for activation_fn in df_results['activation_fn'].unique():
    subset = df_results[df_results['activation_fn'] == activation_fn]
    
    # Convertimos la pérdida a logaritmo para mejorar la visualización
    subset['log_final_loss'] = np.log10(subset['final_loss'] + 1e-10)  # Evitar log(0) sumando un pequeño valor

    heatmap_data = subset.pivot(index="hidden_size", columns="num_layers", values="log_final_loss")

    plt.figure(figsize=(8, 6))
    sns.heatmap(heatmap_data, annot=True, fmt=".3f", cmap="BuGn_r")
    plt.title(f'Heatmap de pérdida final en escala logarítmica - Activación: {activation_fn}')
    plt.xlabel('Número de capas ocultas')
    plt.ylabel('Tamaño de capa oculta')

    # Guardar cada gráfico con el nombre de la función de activación
    filename = f"{output_folder}/heatmap_loss_log_{activation_fn}.png"
    plt.savefig(filename)
    plt.close()  # Cerrar la figura para liberar memoria


NameError: name 'df_results' is not defined