> NOTA: esta notebook importa funciones de notebook.ipynb.
> Posiblemente no funcione en Google Colab sin configuraci칩n adicional.

# 游댍 Grid Search

Se implementa una b칰squeda exhaustiva (grid search) de combinaciones de hiperpar치metros para encontrar la mejor configuraci칩n del modelo y analizar el impacto de los hiperpar치metros en el entrenamiento. Se exploran diferentes:

- Arquitecturas de red (capas ocultas),
- Tama침os de batch para SGD, y
- Tasas de decrecimiento del learning rate.

Los resultados se guardan en un CSV para an치lisis posterior y se generan visualizaciones de cada experimento.

Primero se deben importar las funciones y variables de `notebook.ipynb`.

In [None]:

def grid_search_minibatch(datasets):
    """
    Performs grid search over hyperparameters using train_with_minibatch_sgd.

    Explores:
    - Hidden layer 1 sizes: [50, 40, 30, 20, 10]
    - Hidden layer 2 sizes: [25, 20, 15, 10, 5]
    - Decay rates: [0.01, 0.02, 0.05]
    - Batch sizes: [16, 8]

    Returns:
        DataFrame with results for all experiments saved to 'grid_search_results.csv'
    """
    # Define hyperparameter grid
    h1_sizes = [50, 40, 30, 20, 10]
    h2_sizes = [25, 20, 15, 10, 5]
    decay_rates = [0.01, 0.02, 0.05]
    batch_sizes = [16, 8]

    # Fixed hyperparameters
    initial_lr = 0.1
    min_lr = 0.05
    tolerance = 1e-6

    # Get sorted dataset keys for consistent ordering
    dataset_keys = sorted(datasets.keys())

    # Results storage
    all_results = []

    # Total number of experiments
    total_experiments = (
        len(h1_sizes) * len(h2_sizes) * len(decay_rates) * len(batch_sizes)
    )
    experiment_num = 0

    print(f"Starting grid search: {total_experiments} total experiments")
    print("=" * 80)

    # Iterate over all combinations
    for h1 in h1_sizes:
        for h2 in h2_sizes:
            for decay in decay_rates:
                for batch_size in batch_sizes:
                    experiment_num += 1

                    # Create experiment name
                    decay_pct = int(decay * 100)
                    experiment_name = f"lr{initial_lr:.2f}_decay{decay_pct}pct_bs{batch_size}_layers100-{h1}-{h2}-3"

                    print(
                        f"\n[{experiment_num}/{total_experiments}] Starting: {experiment_name}"
                    )

                    # Reset RNG for reproducibility
                    reset_rng()

                    # Create MLP factory
                    def mlp_factory():
                        return MLP2(
                            layers=(100, h1, h2, 3),
                            activation_type="sigmoid",
                            learning_rate=initial_lr,
                            momentum=0.1,
                        )

                    # Train with minibatch SGD
                    results = train_with_minibatch_sgd(
                        datasets=datasets,
                        mlp_factory=mlp_factory,
                        batch_size=batch_size,
                        shuffle=True,
                        initial_lr=initial_lr,
                        decay=decay,
                        min_lr=min_lr,
                        tolerance=tolerance,
                    )

                    # Build result row
                    row = {"experiment": experiment_name}

                    # Track metrics for finding best dataset
                    dataset_metrics = {}

                    # Process each dataset result
                    for res in results:
                        dataset_name = res["dataset_name"]
                        mlp = res["mlp"]
                        val_losses = res["val_losses"]

                        # Compute accuracy for this dataset
                        _, val_data = datasets[dataset_name]
                        n = len(val_data)
                        correct = 0
                        for i in range(n):
                            sample = np.array(
                                val_data.iloc[i, :100].values, dtype=np.float64
                            )
                            true_class = val_data.iloc[i, 100]
                            if mlp.classify(sample) == true_class:
                                correct += 1
                        accuracy = correct / n

                        # Compute loss metrics
                        val_loss_mean = np.mean(val_losses)
                        val_loss_final = val_losses[-1]

                        # Store in row
                        row[f"{dataset_name}_accuracy"] = accuracy
                        row[f"{dataset_name}_val_loss_mean"] = val_loss_mean
                        row[f"{dataset_name}_val_loss_final"] = val_loss_final

                        # Track for best dataset
                        dataset_metrics[dataset_name] = val_loss_mean

                    # Find best dataset (minimum mean validation loss)
                    best_dataset = min(dataset_metrics, key=dataset_metrics.get)
                    row["best_dataset_by_min_val_loss"] = best_dataset

                    # Add to results
                    all_results.append(row)

                    # Plot training evolution for this experiment
                    fig, axes = plt.subplots(3, 3, figsize=(15, 10))
                    fig.suptitle(
                        f"Experiment: {experiment_name}", fontsize=16, fontweight="bold"
                    )

                    for idx, res in enumerate(results):
                        ax = axes[idx // 3][idx % 3]
                        ax.plot(
                            res["epochs"],
                            res["train_losses"],
                            color="orangered",
                            label="Training Loss",
                        )
                        ax.plot(
                            res["epochs"],
                            res["val_losses"],
                            color="seagreen",
                            linestyle="--",
                            label="Validation Loss",
                        )

                        ax.set_yscale("log")
                        ax.set_title(f"Dataset '{res['dataset_name']}'", fontsize=10)
                        ax.grid(True, alpha=0.3)
                        ax.set_xlabel("Epochs")

                        # Set loss label on first column
                        if idx % 3 == 0:
                            ax.set_ylabel("Loss")

                        # Add legend only to the first subplot
                        if idx == 0:
                            ax.legend(loc="upper right", fontsize=8)

                    plt.tight_layout()
                    plt.show()
                    plt.close()

                    print(f"Experiment {experiment_name} finished")
                    print(
                        f"  - Best dataset: {best_dataset} (val_loss_mean={dataset_metrics[best_dataset]:.6f})"
                    )

    # Create DataFrame and save to CSV
    df_results = pd.DataFrame(all_results)
    csv_filename = "grid_search_results.csv"
    df_results.to_csv(csv_filename, index=False)

    print("\n" + "=" * 80)
    print("Grid search completed!")
    print(f"Results saved to: {csv_filename}")
    print(f"Total experiments: {len(all_results)}")

    return df_results


grid_search_minibatch(datasets)

NameError: name 'datasets' is not defined