# Redes Neuronales

In [5]:
from IPython.display import Image
import polars as pl
import os
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
device = 'cpu'
sys.path.append('/home/sebacastillo/neuralnets/')
from src.utils import get_project_root
root = get_project_root()

In [2]:
OR_problem = False
if OR_problem:
   df = np.loadtxt(str(root) + '/data/OR.csv',
                 delimiter=",")
else:
   df = np.loadtxt(str(root) + '/data/XOR.csv',
                 delimiter=",")

# Bib
- https://github.com/automl/Auto-PyTorch
- https://pytorch.org/blog/effective-multi-objective-nueral-architecture/ 
- https://ax.dev/tutorials/tune_cnn.html

# First DNN

In [None]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)





In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_size, layer_config, output_size):
        super(MLP, self).__init__()
        
        self.layers = nn.ModuleList()
        
        # Añade la primera capa (capa de entrada)
        self.layers.append(nn.Linear(input_size, layer_config[0]))
        
        # Añade las capas ocultas
        for i in range(1, len(layer_config)):
            self.layers.append(nn.Linear(layer_config[i-1], layer_config[i]))
        
        # Añade la capa de salida
        self.layers.append(nn.Linear(layer_config[-1], output_size))
    
    def forward(self, x):
        for layer in self.layers[:-1]:
            x = F.relu(layer(x))  # Utiliza la función de activación ReLU para las capas de entrada y ocultas
        x = self.layers[-1](x)  # No aplicar función de activación en la capa de salida
        return x

# Ejemplo de uso:
# Define la configuración de capas y neuronas en la forma de una lista
# Por ejemplo, [64, 32] creará 2 capas ocultas con 64 y 32 neuronas respectivamente
layer_config = [64, 32]

# Define el tamaño de entrada y salida
input_size = 784  # Por ejemplo, para imágenes de MNIST (28x28)
output_size = 10  # Por ejemplo, para clasificación de dígitos del 0 al 9

# Crea una instancia del perceptrón multicapa
model = MLP(input_size, layer_config, output_size)
print(model)


MLP(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=10, bias=True)
  )
)


# RandomSearch

In [10]:
import random

def random_search_mlp(train_loader, test_loader, input_size, output_size, n_iterations, device):
    best_model = None
    best_accuracy = 0

    for i in range(n_iterations):
        # Generate random hyperparameters
        layer_config = [random.choice([10, 12, 14, 16]) for _ in range(random.choice([1, 2, 3]))]
        learning_rate = 10**random.uniform(-4, -1)
        batch_size = random.choice([32, 64, 128, 160])
        num_epochs = random.choice([10, 20, 30, 50])

        print(f"Trial {i + 1}/{n_iterations}:")
        print(f"  Layer config: {layer_config}")
        print(f"  Learning rate: {learning_rate}")
        print(f"  Batch size: {batch_size}")
        print(f"  Num epochs: {num_epochs}")

        # Create the model with the current hyperparameters
        model = MLP(input_size, layer_config, output_size).to(device)
        
        # Set the loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Train the model
        for epoch in range(num_epochs):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

        # Test the model
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()

        accuracy = correct / total
        print(f"  Accuracy: {accuracy * 100:.2f}%")

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

    return best_model, best_accuracy


# Bayesian Optimization

In [None]:
pip install bayesian-optimization


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# MLP class definition goes here

def train_evaluate_mlp(layer_config, learning_rate, batch_size, num_epochs, train_loader, test_loader, device):
    layer_config = [int(x) for x in layer_config]
    input_size = 784  # For example, for MNIST dataset (28x28)
    output_size = 10  # For example, for digit classification (0-9)

    model = MLP(input_size, layer_config, output_size).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Training the model
    for epoch in range(int(num_epochs)):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    # Evaluating the model
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = correct / total
    return accuracy


In [None]:
from bayes_opt import BayesianOptimization
import numpy as np

# Load your train and test datasets here
train_loader = ...
test_loader = ...

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def optimize_mlp(n_iter, init_points, train_loader, test_loader, device):
    def objective_function(layer1, layer2, layer3, learning_rate, batch_size, num_epochs):
        layer_config = [layer1, layer2, layer3]
        layer_config = [int(x) for x in layer_config if x >= 1]  # Remove any layers with size < 1
        return train_evaluate_mlp(layer_config, 10**learning_rate, int(batch_size), int(num_epochs), train_loader, test_loader, device)

    pbounds = {'layer1': (16, 512),
               'layer2': (0, 512),
               'layer3': (0, 512),
               'learning_rate': (-4, -1),
               'batch_size': (16, 256),
               'num_epochs': (10, 100)}

    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=pbounds,
        random_state=42,
    )

    optimizer.maximize(init_points=init_points, n_iter=n_iter)

    print("Best hyperparameters found:")
    print(optimizer.max)

optimize_mlp(n_iter=10, init_points=2, train_loader=train_loader, test_loader=test_loader, device=device)


In [13]:
import torch
from torchvision import datasets, transforms

def get_mnist_data_loaders(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST('./data', train=False, transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


In [None]:
# Load the dataset with a temporary batch_size (will be optimized later)
train_loader, test_loader = get_mnist_data_loaders(batch_size=100)

# Use the random_search_mlp function to find the best model and its accuracy
best_model_random, best_accuracy_random = random_search_mlp(train_loader, test_loader, input_size=784, output_size=10, n_iterations=10, device=device)

print(f"Best model found with Random Search: {best_model_random}")
print(f"Best accuracy with Random Search: {best_accuracy_random * 100:.2f}%")


In [None]:
# Use the optimize_mlp function to find the best hyperparameters
optimize_mlp(n_iter=10, init_points=2, train_loader=train_loader, test_loader=test_loader, device=device)


In [None]:
import matplotlib.pyplot as plt

# Assuming we have run both random_search_mlp and optimize_mlp, and have the best_model_random and best_accuracy_random variables available
batch_size = 100
_, test_loader = get_mnist_data_loaders(batch_size)

with torch.no_grad():
    correct = 0
    total = 0
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = best_model_random(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

    accuracy = correct / total

    fig, ax = plt.subplots()
    ax.bar(['Random Search'], [accuracy * 100])
    ax.set_ylim([0, 100])
    ax.set_ylabel('Accuracy (%)')
    plt.show()
