<a href="https://colab.research.google.com/github/Zewtta/PEL219_2025_RedesNeuraisArtificiais/blob/main/PEL219_MLP_Backpropagation_Matheus_Vieira.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Tarefa 2 - MLP Backpropagation

##Aluno: Matheus Vieira

###Loading dos dados

In [6]:
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

colunas = ["sepallength", "sepalwidth", "petallength", "petalwidth", "class"]
df = pd.read_csv(url, names=colunas)

print(df.head())


   sepallength  sepalwidth  petallength  petalwidth        class
0          5.1         3.5          1.4         0.2  Iris-setosa
1          4.9         3.0          1.4         0.2  Iris-setosa
2          4.7         3.2          1.3         0.2  Iris-setosa
3          4.6         3.1          1.5         0.2  Iris-setosa
4          5.0         3.6          1.4         0.2  Iris-setosa


In [5]:
import numpy as np
import pandas as pd
from urllib.request import urlopen

def load_iris():

    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    data = urlopen(url).read().decode("utf-8").strip().splitlines()

    X, y = [], []
    label_map = {"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}
    for line in data:
        if not line:
            continue
        parts = line.split(",")
        if len(parts) != 5:
            continue
        feats = list(map(float, parts[:4]))
        label = label_map[parts[4]]
        X.append(feats)
        y.append(label)

    return np.array(X, dtype=np.float64), np.array(y, dtype=np.int64)


X, y = load_iris()

print(X.shape)
print(y.shape)

#X são os dados de input
#y são os resultados de classificação, onde 0 é a iris setosa, 1 é a iris versicolor e 2 a iris virgginica





(150, 4)
(150,)


### MLP backpropagation

In [7]:
import numpy as np
from urllib.request import urlopen


def load_iris():

    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    data = urlopen(url).read().decode("utf-8").strip().splitlines()

    X, y = [], []
    label_map = {"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}
    for line in data:
        if not line:
            continue
        parts = line.split(",")
        if len(parts) != 5:
            continue
        feats = list(map(float, parts[:4]))
        label = label_map[parts[4]]
        X.append(feats)
        y.append(label)

    return np.array(X, dtype=np.float64), np.array(y, dtype=np.int64)


def train_test_split_np(X, y, test_size=0.3, seed=42):

    rng = np.random.default_rng(seed)
    idx = np.arange(len(X))
    rng.shuffle(idx)
    X = X[idx]
    y = y[idx]
    n_test = int(round(test_size * len(X)))
    X_test = X[:n_test]
    y_test = y[:n_test]
    X_train = X[n_test:]
    y_train = y[n_test:]
    return X_train, X_test, y_train, y_test


def one_hot(y, num_classes):

    oh = np.zeros((y.size, num_classes))
    oh[np.arange(y.size), y] = 1.0
    return oh


def confusion_matrix(y_true, y_pred, num_classes):
    cm = np.zeros((num_classes, num_classes), dtype=int)
    for t, p in zip(y_true, y_pred):
        cm[t, p] += 1
    return cm


# MLP (4-9-3) com Sigmoide + MSE
class MLP:
    """
    MLP 4-9-3 (regra 2n+1 -> 2*4+1=9)
    - 1 camada escondida
    - Sigmoide na oculta
    - Sigmoide na saída
    - Erro quadrático médio (MSE)
    - Treino com gradiente descendente (batch)
    """

    def __init__(self, n_inputs=4, n_hidden=9, n_outputs=3,
                 learning_rate=0.05, seed=0):
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_outputs = n_outputs
        self.lr = learning_rate
        self.rng = np.random.default_rng(seed)
        self._init_params()

    def _init_params(self):
        """
        Inicializa pesos com valores pequenos e aleatórios |w| <= 0.1
        """
        lim = 0.1
        self.W1 = self.rng.uniform(-lim, lim, size=(self.n_inputs, self.n_hidden))
        self.b1 = np.zeros((1, self.n_hidden))

        self.W2 = self.rng.uniform(-lim, lim, size=(self.n_hidden, self.n_outputs))
        self.b2 = np.zeros((1, self.n_outputs))

    # ----------------- ativações -----------------
    @staticmethod
    def _sigmoid(z):
        return 1.0 / (1.0 + np.exp(-z))

    @staticmethod
    def _dsigmoid(a):
        # derivada em função da saída s = f(net): f'(net) = s * (1 - s)
        return a * (1.0 - a)

    # ----------------- forward -----------------
    def forward(self, X):
        """
        Fase feed-forward:
        X -> camada escondida -> camada de saída
        """
        Z1 = X @ self.W1 + self.b1
        A1 = self._sigmoid(Z1)

        Z2 = A1 @ self.W2 + self.b2
        A2 = self._sigmoid(Z2)  # sigmoide na saída

        cache = {"X": X, "Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
        return A2, cache

    # ----------------- loss -----------------
    @staticmethod
    def _mse(pred, y_onehot):
        """
        Erro quadrático médio:
        E = (1/2m) * sum_{p,j} (t_j^p - s_j^p)^2
        """
        return 0.5 * np.mean((pred - y_onehot) ** 2)

    # ----------------- backward -----------------
    def backward(self, cache, y_onehot):
        """
        Fase feed-backward (backprop):
        calcula gradientes dW1, db1, dW2, db2.
        """
        X = cache["X"]
        A1 = cache["A1"]
        A2 = cache["A2"]
        m = X.shape[0]

        # Camada de saída: sigmoide + MSE
        # erro = A2 - t
        dA2 = (A2 - y_onehot)                   # (m, n_outputs)
        dZ2 = dA2 * self._dsigmoid(A2)          # (m, n_outputs)
        dW2 = (A1.T @ dZ2) / m                  # (n_hidden, n_outputs)
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        # Camada escondida
        dA1 = dZ2 @ self.W2.T                  # (m, n_hidden)
        dZ1 = dA1 * self._dsigmoid(A1)         # (m, n_hidden)
        dW1 = (X.T @ dZ1) / m                  # (n_inputs, n_hidden)
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        grads = {"dW1": dW1, "db1": db1,
                 "dW2": dW2, "db2": db2}
        return grads

    # ----------------- update -----------------
    def _update_params(self, grads):
        """
        Atualiza pesos na direção oposta ao gradiente
        """
        self.W1 -= self.lr * grads["dW1"]
        self.b1 -= self.lr * grads["db1"]
        self.W2 -= self.lr * grads["dW2"]
        self.b2 -= self.lr * grads["db2"]

    # ----------------- fit -----------------
    def fit(self, X, y, X_val=None, y_val=None, epochs=1000, verbose=True):
        """
        Treino da MLP com gradiente descendente "batch":
        em cada época, usa todos os padrões de treinamento.
        """
        num_classes = self.n_outputs
        y_oh = one_hot(y, num_classes)

        for ep in range(1, epochs + 1):
            # Forward
            y_pred, cache = self.forward(X)
            loss = self._mse(y_pred, y_oh)

            # Backward
            grads = self.backward(cache, y_oh)

            # Atualiza pesos
            self._update_params(grads)

            if verbose and (ep == 1 or ep % 100 == 0 or ep == epochs):
                if X_val is not None and y_val is not None:
                    y_val_pred, _ = self.forward(X_val)
                    y_val_oh = one_hot(y_val, num_classes)
                    val_loss = self._mse(y_val_pred, y_val_oh)
                    y_val_pred_cls = np.argmax(y_val_pred, axis=1)
                    val_acc = np.mean(y_val_pred_cls == y_val)
                    print(f"Epoch {ep:4d} | loss={loss:.4f} | val_loss={val_loss:.4f} | val_acc={val_acc:.3f}")
                else:
                    print(f"Epoch {ep:4d} | loss={loss:.4f}")

    # ----------------- predict -----------------
    def predict(self, X):
        """
        Pega a classe como o neurônio de saída com maior ativação (argmax)
        """
        probs, _ = self.forward(X)
        return np.argmax(probs, axis=1)

    # ----------------- evaluate -----------------
    def evaluate(self, X, y):
        """
        Calcula acurácia e matriz de confusão.
        """
        y_pred = self.predict(X)
        acc = np.mean(y_pred == y)
        cm = confusion_matrix(y, y_pred, self.n_outputs)
        return acc, cm



if __name__ == "__main__":
    # 1. Carrega dados
    X, y = load_iris()

    # 2. Split 70/30 (hold-out) sem sklearn
    X_train, X_test, y_train, y_test = train_test_split_np(X, y, test_size=0.3, seed=42)

    # 3. Cria MLP 4-9-3 (regra 2n+1 -> 2*4+1=9)
    mlp = MLP(n_inputs=4, n_hidden=9, n_outputs=3,
              learning_rate=0.05, seed=0)

    # 4. Treina
    mlp.fit(X_train, y_train, X_val=X_test, y_val=y_test,
            epochs=1500, verbose=True)

    # 5. Avalia
    acc, cm = mlp.evaluate(X_test, y_test)
    print("\nAcurácia de teste:", round(acc, 4))
    print("Matriz de confusão (linhas=real, cols=predito):\n", cm)


Epoch    1 | loss=0.1228 | val_loss=0.1229 | val_acc=0.311
Epoch  100 | loss=0.1116 | val_loss=0.1120 | val_acc=0.311
Epoch  200 | loss=0.1100 | val_loss=0.1104 | val_acc=0.622
Epoch  300 | loss=0.1091 | val_loss=0.1095 | val_acc=0.622
Epoch  400 | loss=0.1080 | val_loss=0.1083 | val_acc=0.622
Epoch  500 | loss=0.1063 | val_loss=0.1066 | val_acc=0.622
Epoch  600 | loss=0.1038 | val_loss=0.1042 | val_acc=0.622
Epoch  700 | loss=0.1005 | val_loss=0.1009 | val_acc=0.644
Epoch  800 | loss=0.0962 | val_loss=0.0967 | val_acc=0.978
Epoch  900 | loss=0.0911 | val_loss=0.0916 | val_acc=0.800
Epoch 1000 | loss=0.0856 | val_loss=0.0861 | val_acc=0.733
Epoch 1100 | loss=0.0802 | val_loss=0.0808 | val_acc=0.711
Epoch 1200 | loss=0.0755 | val_loss=0.0761 | val_acc=0.711
Epoch 1300 | loss=0.0715 | val_loss=0.0722 | val_acc=0.711
Epoch 1400 | loss=0.0683 | val_loss=0.0691 | val_acc=0.711
Epoch 1500 | loss=0.0658 | val_loss=0.0667 | val_acc=0.711

Acurácia de teste: 0.7111
Matriz de confusão (linhas=re