In [None]:
# === Import necessary libraries ===
import torch
import matplotlib.pyplot as plt

# === Layer Class ===
class Layer:
    def __init__(self, n_neurons, activation=None):
        self.n_neurons = n_neurons
        self.activation = activation

    # Forward pass for this layer
    def forward(self, inputs, weights):
        z = inputs @ weights
        if self.activation is None:
            return z
        elif self.activation == "Relu":
            return torch.relu(z)
        elif self.activation == "sigmoid":
            return torch.sigmoid(z)
        elif self.activation == "tanh":
            return torch.tanh(z)
        else:
            raise ValueError(f"Unknown activation function {self.activation}")

# === MLP (Multi-Layer Perceptron) Class ===
class MLP:
    def __init__(self, x, y, learning_rate=0.01):
        # Store input data and targets
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.learning_rate = learning_rate
        self.layers = []
        self.weights = []
        self.loss_function = None  # Define this before training

    # Softmax activation function (with numerical stabilization)
    def softmax(self, res):
        res = res - torch.max(res, dim=1, keepdim=True)[0]
        exp_res = torch.exp(res)
        return exp_res / torch.sum(exp_res, dim=1, keepdim=True)

    # Add a layer to the network
    def add_layer(self, shape, n_neurons, activation=None):
        layer = Layer(n_neurons, activation)
        self.layers.append(layer)
        self.weights.append(torch.rand(shape, n_neurons, requires_grad=True))

    # Forward pass through all layers
    def forward(self, x):
        inputs = x
        for i in range(len(self.layers)):
            inputs = self.layers[i].forward(inputs, self.weights[i])
        return inputs

    # Mean Squared Error (MSE) loss
    def MSE(self, ypred, ytrue, l=None):
        loss = 0.5 * torch.mean((ypred - ytrue) ** 2)
        if l is not None:
            reg = torch.sum([torch.sum(w ** 2) for w in self.weights])
            loss += reg
        return loss

    # Cross-Entropy Loss with optional L2 regularization
    def Logcrossentropy(self, logits, ytrue, l=None):
        ypred = self.softmax(logits)
        epsilon = 1e-9
        loss = -torch.mean(torch.sum(ytrue * torch.log(ypred + epsilon), dim=1))
        if l is not None:
            reg = torch.sum(torch.stack([torch.sum(w ** 2) for w in self.weights]))
            loss += l * reg
        return loss

    # Split data into training and validation sets (70/30 split)
    def split_data(self):
        indices = torch.randperm(len(self.x))
        size = int(len(self.x) * 0.7)
        x = self.x[indices]
        y = self.y[indices]
        return x[:size], y[:size], x[size:], y[size:]

    # Perform k-fold cross-validation split
    def cross_validation(self, k):
        fold_len = len(self.x) // k
        indices = torch.randperm(len(self.x))
        x = self.x[indices]
        y = self.y[indices]
        x_folds, y_folds = [], []
        for i in range(k):
            x_folds.append(x[fold_len * i: fold_len * (i + 1)])
            y_folds.append(y[fold_len * i: fold_len * (i + 1)])
        return x_folds, y_folds

    # Backward pass and weight update
    def backward_propagation(self, loss):
        loss.backward()
        with torch.no_grad():
            for w in self.weights:
                w -= self.learning_rate * w.grad
                w.grad.zero_()

    # Batch Gradient Descent training
    def batch_gd_train(self, epochs, x_train, y_train, x_val, y_val, l2=False, l=0.1):
        losses, val_losses = [], []
        for _ in range(epochs):
            train_pred = self.forward(x_train)
            val_pred = self.forward(x_val)
            if self.loss_function is None:
                raise ValueError("loss_function not defined")
            loss = self.loss_function(train_pred, y_train, l if l2 else None)
            self.backward_propagation(loss)
            val_loss = self.loss_function(val_pred, y_val).item()
            val_losses.append(val_loss)
            losses.append(loss.item())
        return losses, val_losses

    # Mini-Batch Stochastic Gradient Descent training
    def minibatch_SGD_train(self, epochs, x_train, y_train, x_val, y_val, batch_size, l2=False, l=0.1):
        losses, val_losses = [], []
        num_batches = len(x_train) // batch_size
        for _ in range(epochs):
            indices = torch.randperm(len(x_train))
            x_train = x_train[indices]
            y_train = y_train[indices]
            for i in range(num_batches):
                start, end = i * batch_size, (i + 1) * batch_size
                x_batch, y_batch = x_train[start:end], y_train[start:end]
                train_pred = self.forward(x_batch)
                loss = self.loss_function(train_pred, y_batch, l if l2 else None)
                self.backward_propagation(loss)
            val_pred = self.forward(x_val)
            val_losses.append(self.loss_function(val_pred, y_val).item())
            losses.append(loss.item())
        return losses, val_losses

    # Stochastic Gradient Descent (SGD) training
    def SGD_train(self, epochs, x_train, y_train, x_val, y_val, l2=False, l=0.1):
        losses, val_losses = [], []
        for _ in range(epochs):
            indices = torch.randperm(len(x_train))
            x_train = x_train[indices]
            y_train = y_train[indices]
            for i in range(len(x_train)):
                xi, yi = x_train[i].unsqueeze(0), y_train[i].unsqueeze(0)
                train_pred = self.forward(xi)
                loss = self.loss_function(train_pred, yi, l if l2 else None)
                self.backward_propagation(loss)
            val_pred = self.forward(x_val)
            val_losses.append(self.loss_function(val_pred, y_val).item())
            losses.append(loss.item())
        return losses, val_losses

    # Compute RMSE metric
    def evaluate_rmse(self, ypred, ytrue):
        return torch.sqrt(torch.mean((ypred - ytrue) ** 2))

    # Plot loss curves
    def plot_loss(self, losses, val_losses, epochs):
        plt.plot(range(epochs), losses, label="train_loss")
        plt.plot(range(epochs), val_losses, label="val_loss")
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Loss evolution')
        plt.legend()
        plt.show()

    # Compute classification accuracy
    def accuracy(self, ypred, ytrue):
        pred_classes = torch.argmax(ypred, dim=1)
        true_classes = torch.argmax(ytrue, dim=1)
        return (pred_classes == true_classes).float().mean()
