<a href="https://colab.research.google.com/github/caua-sathler/NEURAL-NETWORKS/blob/main/MLP-PSO-SGD/MLP_PSO_Adam_Breast_Cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import random

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1) Carregar o dataset Breast Cancer
data = load_breast_cancer()
X = data.data           # shape (569, 30)
y = data.target         # 0 ou 1

# 2) Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 3) Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 4) Converter para tensores PyTorch
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# 5) Definir a rede MLP
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 16)
        self.fc6 = nn.Linear(16,8)
        self.fc7 = nn.Linear(8, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        x = self.fc7(x)  # Saída final (logits)
        return x

# 6) Classe Partícula para PSO adaptado ao treinamento da rede neural
class Particle:
    def __init__(self, model, device):
        self.model = copy.deepcopy(model).to(device)
        self.best_model = copy.deepcopy(model).to(device)

        # Limites do espaço de busca
        low = -10.0
        high = 10.0
        velocity_scale = 0.1

        # Inicializar posição e velocidade
        self.position = {
            name: torch.rand_like(param).to(device) * (high - low) + low
            for name, param in model.named_parameters()
        }
        self.velocity = {
            name: torch.randn_like(param).to(device) * velocity_scale
            for name, param in model.named_parameters()
        }

        self.best_score = float('inf')
        self.device = device

        # Otimizador Adam para a partícula
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.0005, weight_decay=1e-5)

    def pso_sgd(self, global_best_model, inertia, c1, c2, learning_rate, beta1, beta2, epsilon, m, v, t):
        for name, param in self.model.named_parameters():
            if param.grad is None:
                continue

            local_rand = random.random()
            global_rand = random.random()

            # Atualização da velocidade
            self.velocity[name] = (
                inertia * self.velocity[name]
                + c1 * local_rand * (self.best_model.state_dict()[name].to(self.device) - param.data)
                + c2 * global_rand * (global_best_model.state_dict()[name].to(self.device) - param.data)
            )

            # Atualizar posição
            self.position[name] = param.data + self.velocity[name]
            param.data = self.position[name]

            # Atualização do Adam (simplificada)
            m[name] = m[name].to(param.device)
            v[name] = v[name].to(param.device)

            m[name] = beta1 * m[name] + (1 - beta1) * param.grad
            v[name] = beta2 * v[name] + (1 - beta2) * (param.grad ** 2)

            m_hat = m[name] / (1 - beta1 ** t)
            v_hat = v[name] / (1 - beta2 ** t)

            # Aqui, substituído por um update "simples"
            param.data = self.position[name] - learning_rate * param.grad

    def evaluate_test(self, x_test, y_test, criterion):
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(x_test)
            loss = criterion(outputs, y_test)
            _, predicted = torch.max(outputs.data, 1)
            accuracy = (predicted == y_test).sum().item() / len(x_test)

        avg_loss = loss.item()
        return avg_loss, accuracy * 100

    def evaluate_train(self, x_train, y_train, criterion):
        self.model.train()
        self.optimizer.zero_grad()

        outputs = self.model(x_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        self.optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == y_train).sum().item() / len(x_train)

        avg_loss = loss.item()
        return avg_loss, accuracy * 100

# 7) Parâmetros do PSO
pop_size = 10
num_epochs = 150
inertia = 0.9
c1, c2 = 0.5, 0.9
learning_rate = 0.0001
beta1, beta2 = 0.5, 0.999
epsilon = 1e-8

# 8) Inicializar a rede para Breast Cancer
# input_dim=30 (breast cancer) e output_dim=2 (classificação binária)
model = MLP(input_dim=30, output_dim=2)
particles = [Particle(model, device) for _ in range(pop_size)]

global_best_model = copy.deepcopy(particles[0].model)
global_best_score = float('inf')

criterion = nn.CrossEntropyLoss()

# Inicializar m e v para Adam
m = {name: torch.zeros_like(param) for name, param in model.named_parameters()}
v = {name: torch.zeros_like(param) for name, param in model.named_parameters()}

# 9) Loop de treinamento do PSO
for epoch in range(num_epochs):
    # Atualizar inércia ao longo do tempo (opcional)
    inertia = 0.9 - ((0.9 - 0.4) / num_epochs) * epoch

    for particle in particles:
        particle.model.train()
        particle.optimizer.zero_grad()

        # Atualizar posição (PSO + SGD)
        particle.pso_sgd(global_best_model, inertia, c1, c2, learning_rate,
                         beta1, beta2, epsilon, m, v, epoch + 1)

        # Avaliar e atualizar o local best
        val_loss, val_accuracy = particle.evaluate_train(X_train, y_train, criterion)
        if val_loss < particle.best_score:
            particle.best_score = val_loss
            particle.best_model = copy.deepcopy(particle.model)

    # Determinar e atualizar o g-best
    best_particle = min(particles, key=lambda p: p.best_score)
    if best_particle.best_score < global_best_score:
        global_best_score = best_particle.best_score
        global_best_model = copy.deepcopy(best_particle.best_model)

    # Avaliar no conjunto de teste
    if (epoch + 1) % 10 == 0:
        val_loss, val_accuracy = best_particle.evaluate_test(X_test, y_test, criterion)
        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Validation Loss: {val_loss:.2f}, '
              f'Validation Accuracy: {val_accuracy:.2f}')


Epoch 10/150, Validation Loss: 0.76, Validation Accuracy: 37.43
Epoch 20/150, Validation Loss: 0.75, Validation Accuracy: 37.43
Epoch 30/150, Validation Loss: 0.73, Validation Accuracy: 37.43
Epoch 40/150, Validation Loss: 0.69, Validation Accuracy: 37.43
Epoch 50/150, Validation Loss: 0.62, Validation Accuracy: 80.12
Epoch 60/150, Validation Loss: 0.51, Validation Accuracy: 94.74
Epoch 70/150, Validation Loss: 0.35, Validation Accuracy: 94.15
Epoch 80/150, Validation Loss: 0.24, Validation Accuracy: 94.15
Epoch 90/150, Validation Loss: 0.17, Validation Accuracy: 94.74
Epoch 100/150, Validation Loss: 0.11, Validation Accuracy: 95.91
Epoch 110/150, Validation Loss: 0.08, Validation Accuracy: 98.25
Epoch 120/150, Validation Loss: 0.07, Validation Accuracy: 98.25
Epoch 130/150, Validation Loss: 0.07, Validation Accuracy: 97.66
Epoch 140/150, Validation Loss: 0.07, Validation Accuracy: 97.66
Epoch 150/150, Validation Loss: 0.07, Validation Accuracy: 97.66
